diff --git a/tests/tpcdsbench/.gitignore b/tests/tpcdsbench/.gitignore new file mode 100644 index 000000000..98409bdb9 --- /dev/null +++ b/tests/tpcdsbench/.gitignore @@ -0,0 +1,11 @@ +*.swp +*~ +genqueries/* +gendata/* +work/* +log/* +*.o +dsdgen +dsqgen +bin/** +src/toolkit/* diff --git a/tests/tpcdsbench/ACKNOWLEDGEMENTS.md b/tests/tpcdsbench/ACKNOWLEDGEMENTS.md new file mode 100644 index 000000000..e3cd47ab0 --- /dev/null +++ b/tests/tpcdsbench/ACKNOWLEDGEMENTS.md @@ -0,0 +1,5 @@ +# Acknowledgements + +- Credit goes to [*Dilip Biswal*](https://github.com/dilipbiswal), [*Sunitha Kambhampati*](https://github.com/skambha) and [*Xin Wu*](https://github.com/xwu0226) for their implementation. +- Credit goes to [*Suresh Thalamati*](https://github.com/sureshthalamati) and [*Kevin Yu*](https://github.com/kevinyu98) for testing, reviewing and providing valuable feedback. + diff --git a/tests/tpcdsbench/CONTRIBUTING.md b/tests/tpcdsbench/CONTRIBUTING.md new file mode 100644 index 000000000..392662acb --- /dev/null +++ b/tests/tpcdsbench/CONTRIBUTING.md @@ -0,0 +1,59 @@ +## Contributing In General + +Our project welcomes external contributions! If you have an itch, please +feel free to scratch it. + +To contribute code or documentation, please submit a pull request to the [GitHub +repository](https://github.com/IBM/spark-tpc-ds-performance-test). + +A good way to familiarize yourself with the codebase and contribution process is +to look for and tackle low-hanging fruit in the [issue +tracker](https://github.com/IBM/spark-tpc-ds-performance-test/issues). Before embarking on +a more ambitious contribution, please quickly [get in touch](#communication) +with us. + +**We appreciate your effort, and want to avoid a situation where a contribution +requires extensive rework (by you or by us), sits in the queue for a long time, +or cannot be accepted at all!** + +### Proposing new features + +If you would like to implement a new feature, please [raise an +issue](https://github.com/IBM/spark-tpc-ds-performance-test/issues) before sending a pull +request so the feature can be discussed. This is to avoid you spending your +valuable time working on a feature that the project developers are not willing +to accept into the code base. + +### Fixing bugs + +If you would like to fix a bug, please [raise an +issue](https://github.com/IBM/spark-tpc-ds-performance-test/issues) before sending a pull +request so it can be discussed. If the fix is trivial or non controversial then +this is not usually necessary. + +### Merge approval + +The project maintainers use LGTM (Looks Good To Me) in comments on the code +review to indicate acceptance. A change requires LGTMs from two of the +maintainers of each component affected. + +For more details, see the [MAINTAINERS](MAINTAINERS.md) page. + +## Communication + +Please feel free to connect with us: [here](https://github.com/IBM/spark-tpc-ds-performance-test/issues) + +## Setup + +Please add any special setup instructions for your project to help the +developer become productive quickly. + +## Testing + +Please provide information that helps the developer test any changes they +make before submitting. + +## Coding style guidelines + +Beautiful code rocks! Please share any specific style guidelines you might +have for your project. diff --git a/tests/tpcdsbench/LICENSE b/tests/tpcdsbench/LICENSE new file mode 100644 index 000000000..8dada3eda --- /dev/null +++ b/tests/tpcdsbench/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/tests/tpcdsbench/MAINTAINERS.md b/tests/tpcdsbench/MAINTAINERS.md new file mode 100644 index 000000000..3bfcdc296 --- /dev/null +++ b/tests/tpcdsbench/MAINTAINERS.md @@ -0,0 +1,74 @@ +## Maintainers Guide + +This guide is intended for maintainers — anybody with commit access to one or +more Developer Journey repositories. + +## Methodology: + +A master branch. This branch MUST be releasable at all times. Commits and +merges against this branch MUST contain only bugfixes and/or security fixes. +Maintenance releases are tagged against master. + +A develop branch. This branch contains your proposed changes. + +The remainder of this document details how to merge pull requests to the +repositories. + +## Merge approval + +The project maintainers use LGTM (Looks Good To Me) in comments on the code +review to indicate acceptance. A change requires LGTMs from two of the members +of the [cda-journey-dev-admins](https://github.com/orgs/IBM/teams/cda-journey-dev-admins) +team. If the code is written by a member, the change only requires one more +LGTM. + +## Reviewing Pull Requests + +We recommend reviewing pull requests directly within GitHub. This allows a +public commentary on changes, providing transparency for all users. When +providing feedback be civil, courteous, and kind. Disagreement is fine, so +long as the discourse is carried out politely. If we see a record of uncivil +or abusive comments, we will revoke your commit privileges and invite you to +leave the project. + +During your review, consider the following points: + +### Does the change have impact? + +While fixing typos is nice as it adds to the overall quality of the project, +merging a typo fix at a time can be a waste of effort. +(Merging many typo fixes because somebody reviewed the entire component, +however, is useful!) Other examples to be wary of: + +Changes in variable names. Ask whether or not the change will make +understanding the code easier, or if it could simply a personal preference +on the part of the author. + +Essentially: feel free to close issues that do not have impact. + +### Do the changes make sense? + +If you do not understand what the changes are or what they accomplish, +ask the author for clarification. Ask the author to add comments and/or +clarify test case names to make the intentions clear. + +At times, such clarification will reveal that the author may not be using +the code correctly, or is unaware of features that accommodate their needs. +If you feel this is the case, work up a code sample that would address the +issue for them, and feel free to close the issue once they confirm. + +### Is this a new feature? If so: + +Does the issue contain narrative indicating the need for the feature? If not, +ask them to provide that information. Since the issue will be linked in the +changelog, this will often be a user's first introduction to it. + +Are new unit tests in place that test all new behaviors introduced? If not, do +not merge the feature until they are! +Is documentation in place for the new feature? (See the documentation +guidelines). If not do not merge the feature until it is! +Is the feature necessary for general use cases? Try and keep the scope of any +given component narrow. If a proposed feature does not fit that scope, +recommend to the user that they maintain the feature on their own, and close +the request. You may also recommend that they see if the feature gains traction +amongst other users, and suggest they re-submit when they can show such support. diff --git a/tests/tpcdsbench/README.md b/tests/tpcdsbench/README.md new file mode 100644 index 000000000..383f775d6 --- /dev/null +++ b/tests/tpcdsbench/README.md @@ -0,0 +1,398 @@ +# Explore Spark SQL and its performance using TPC-DS workload + +> Data Science Experience is now Watson Studio. Although some images in this code pattern may show the service as Data Science Experience, the steps and processes will still work. + +[Apache Spark](https://spark.apache.org) is a popular distributed data processing engine that is built around speed, ease of use and sophisticated analytics, with APIs in Java, Scala, Python, R, and SQL. Like other data processing engines, Spark has a unified optimization engine that computes the optimal way to execute a workload with the main purpose of reducing the disk IO and CPU usage. + +We can evaluate and measure the performance of Spark SQL using the TPC-DS benchmark. [TPC-DS](http://www.tpc.org/tpcds) is a widely used industry standard decision support benchmark that is used to evaluate performance of data processing engines. Given that TPC-DS exercises some key data warehouse features, running TPC-DS successfully reflects the readiness of Spark in terms of addressing the need of a data warehouse application. Apache Spark v2.0 supports all the ninety-nine decision support queries that is part of this TPC-DS benchmark. + +This Code Pattern is aimed at helping Spark developers quickly setup and run the TPC-DS benchmark in their own development setup. + +When the reader has completed this Code Pattern, they will understand the following: + +* How to setup the TPC-DS toolkit +* How to generate TPC-DS datasets at different scale factor +* How to create Spark database artifacts +* How to run TPC-DS benchmark queries on Spark in local mode and see the results +* Things to consider when increasing the data scale and run against a spark cluster + +![Architecture diagram](doc/source/images/architecture.png) + +## Flow +* Commandline + 1. Create the spark tables with pre-generated dataset. + 2. Run the entire query set or a subset of queries and monitor the results. +* Notebook + 1. Create the spark tables with pre-generated dataset. + 2. Run the entire query set or individual query. + 3. View the query results or performance summary. + 4. View the performance graph. + +## Included components + +* [Apache Spark](https://spark.apache.org/): An open-source, fast and general-purpose cluster computing system +* [Jupyter Notebook](https://jupyter.org/): An open-source web application that allows you to create and share documents that contain live code, equations, visualizations and explanatory text. + +## Featured technologies + +* [Data Science](https://medium.com/ibm-data-science-experience/): Systems and scientific methods to analyze +structured and unstructured data in order to extract knowledge and insights. +* [Artificial Intelligence](https://medium.com/ibm-data-science-experience): Artificial intelligence can be +applied to disparate solution spaces to deliver disruptive technologies. +* [Python](https://www.python.org/): Python is a programming language that lets you work more quickly +and integrate your systems more effectively. + +# Steps + +There are two modes of exercising this Code Pattern: +* Run locally using a simple interactive command line shell script. +* [Run using a Jupyter notebook in Watson Studio](#run-using-a-jupyter-notebook-in-watson-studio). + +## Run locally +1. [Clone the repository](#1-clone-the-repository) +2. [Setup development tools (Optional)](#2-setup-development-tools-optional) +3. [Install Spark](#3-install-spark) +4. [Run the script](#4-run-the-script) + +### 1. Clone the repository + +Clone the `spark-tpc-ds-performance-test` repo locally. In a terminal, run: + +``` +$ git clone https://github.com/IBM/spark-tpc-ds-performance-test +``` + +### 2. Setup development tools (Optional) + +Due to licensing restrictions, the TPCDS toolkit is not included as part of the code pattern. Instead, a pre-generated data set with 1GB scale factor is +included in this pattern. If you want to work with a data set with larger scale factor or explore learning the full life sycle of setting up TPCDS, you can + download the tool kit from [TPC-DS](http://www.tpc.org/tpcds) and compile in your development environment. + +Make sure the required development tools are installed in your platform. This Code Pattern is supported on Mac and Linux platforms only. Depending on your platform, run the following command to install the necessary development tools: + +* **Ubuntu:** +``` $ sudo apt-get install gcc make flex bison byacc git ``` +* **CentOS/RHEL:** +``` $ sudo yum install gcc make flex bison byacc git ``` +* **MacOS:** +``` $ xcode-select --install ``` + +To compile the toolkit you need to the following : + +``` +unzip +cd /tools +make clean +make OS= +``` + +### 3. Install Spark + +To successfully run the TPC-DS tests, Spark must be installed and pre-configured to work with an [Apache Hive](https://hive.apache.org/) metastore. + +Perform 1 or more of the following options to ensure that Spark is installed and configured correctly. Once completed, modify ```bin/tpcdsenv.sh``` to set `SPARK_HOME` pointing to your Spark installation directory. + +**Option 1** - If you already have Spark installed, complete the following steps to ensure your Spark version is properly configured: + +``` +$ cd $SPARK_HOME +$ bin/spark-shell + + // Enter the following command at the scala prompt + scala> spark.conf + scale> spark.conf.get("spark.sql.catalogImplementation") + res5: String = hive + scala> +``` +*Note:* You must exit out of the spark-shell process or you will encounters errors when performing the TPC-DS tests. + +If the prompt returns `String = hive`, then your installation is properly configured. + +**Option 2** - If you don't have an installed Spark version, or your current installation is not properly configured, we suggest trying to pull down version 2.2.0 from the Spark [downloads page](https://spark.apache.org/downloads.html). This version should be configured to work with Apache Hive, but please run the test in the previous option to make sure. + +**Option 3** - The last option available is it to download and build it yourself. The first step is to clone the Spark repo: +``` +$ git clone https://github.com/apache/spark.git +``` +Then build it using these [instructions](https://spark.apache.org/docs/latest/building-spark.html). Please make sure to build Spark with Hive support by following the `Building With Hive and JDBC Support` section. + +### 4. Run the script + +*Note:* Verify that the `bin/tpcdsenv.sh` script has `SPARK_HOME` setup correctly. + +Now that we have Spark setup and the TPC-DS scripts downloaded, we are ready to setup and start running the TPC-DS queries using the `bin/tpcdsspark.sh` utility script. This driver script will allow you to compile the TPC-DS toolkit to produce the data and the queries, and then run them to collect results. + +Perform the following steps to complete the execution of the script: + +``` + $ cd spark-tpc-ds-performance-test + $ bin/tpcdsspark.sh + +============================================== +TPC-DS On Spark Menu +---------------------------------------------- +SETUP + (1) Create spark tables +RUN + (2) Run a subset of TPC-DS queries + (3) Run All (99) TPC-DS Queries +CLEANUP + (4) Cleanup + (Q) Quit +---------------------------------------------- +Please enter your choice followed by [ENTER]: +``` + +#### Setup Option: "(1) - Create Spark Tables" + +This option creates the tables in the database name specified by `TPCDS_DBNAME` defined in `bin/tpcdsenv.sh`. The default name is `TPCDS` but can be changed if needed. The created tables are based on the pre-generated data. + +The SQL statements to create the tables can be found in `src/ddl/individual`, and are created in parquet format for efficient processing. + +> Due to licensing restrictions, the TPCDS toolkit is not included as part of the code pattern. Instead, a pre-generated data set with 1GB scale factor is + included in this pattern. If you want to work with a data set with larger scale factor or explore learning the full life sycle of setting up TPCDS, you can + download the tool kit from [TPC-DS](http://www.tpc.org/tpcds) and compile in your development environment. Here are the instructions that describes how + to compile the tool kit and generate data. + + +1. Compile the toolkit + + ``` + unzip + cd /tools + make clean + make OS= + # (platform can be 'macos' or 'linux'). + ``` + +2. Generate the data. + + ``` + cd /src/toolkit/tools + ./dsdgen -dir -scale -verbose y -terminate n + # data_gen_dir => The output directory where data will be generated at. + # scale_factor => The scale factor of data. + +3. Generate the queries. + + The `dsqgen` utility in the tpcds toolkit may be used to generate the queries. Appropiate options should be passed to this utility. A typical example of its usage is: + + ``` + cd /tools + ./dsqgen -VERBOSE Y -DIALECT -DIRECTORY -SCALE -OUTPUT_DIR + ``` + +Below is example output for when this option is chosen. + +``` +============================================== +TPC-DS On Spark Menu +---------------------------------------------- +SETUP + (1) Create spark tables +RUN + (2) Run a subset of TPC-DS queries + (3) Run All (99) TPC-DS Queries +CLEANUP + (4) Cleanup + (Q) Quit +---------------------------------------------- +Please enter your choice followed by [ENTER]: 1 +---------------------------------------------- + +INFO: Creating tables. Will take a few minutes ... +INFO: Progress : [########################################] 100% +INFO: Spark tables created successfully.. +Press any key to continue +``` + +#### Run Option: "(2) - Run a subset of TPC-DS queries" + +A comma separated list of queries can be specified in this option. The result of each query in the supplied list is generated in `TPCDS_WORK_DIR`, with a default directory location of `work`. The format of the result file is `query.res`. + +A summary file named `run_summary.txt` is also generated. It contains information about query number, execution time and number of rows returned. + +*Note:* The query number is a two digit number, so for query 1 the results will be in `query01.res`. + +*Note:* If you are debugging and running queries using this option, make sure to save `run_summary.txt` after each of your runs. + +``` +============================================== +TPC-DS On Spark Menu +---------------------------------------------- +SETUP + (1) Create spark tables +RUN + (2) Run a subset of TPC-DS queries + (3) Run All (99) TPC-DS Queries +CLEANUP + (4) Cleanup toolkit + (Q) Quit +---------------------------------------------- +Please enter your choice followed by [ENTER]: 2 +---------------------------------------------- + +Enter a comma separated list of queries to run (ex: 1, 2), followed by [ENTER]: +1,2 +INFO: Checking pre-reqs for running TPC-DS queries. May take a few seconds.. +INFO: Checking pre-reqs for running TPC-DS queries is successful. +INFO: Running TPCDS queries. Will take a few minutes depending upon the number of queries specified.. +INFO: Progress : [########################################] 100% +INFO: TPCDS queries ran successfully. Below are the result details +INFO: Individual result files: spark-tpc-ds-performance-test/work/query.res +INFO: Summary file: spark-tpc-ds-performance-test/work/run_summary.txt +Press any key to continue +``` + +#### Run Option: "(3) - Run all (99) TPC-DS queries" + +The only difference between this and option `(5)` is that all 99 TPC-DS queries are run instead of a subset. + +*Note:* If you are running this on your laptop, it can take a few hours to run all 99 TPC-DS queries. + +``` +============================================== +TPC-DS On Spark Menu +---------------------------------------------- +SETUP + (1) Create spark tables +RUN + (2) Run a subset of TPC-DS queries + (3) Run All (99) TPC-DS Queries +CLEANUP + (4) Cleanup toolkit + (Q) Quit +---------------------------------------------- +Please enter your choice followed by [ENTER]: 3 +---------------------------------------------- +INFO: Checking pre-reqs for running TPC-DS queries. May take a few seconds.. +INFO: Checking pre-reqs for running TPC-DS queries is successful. +INFO: Running TPCDS queries. Will take a few minutes depending upon the number of queries specified.. +INFO: Progress : [########################################] 100% +INFO: TPCDS queries ran successfully. Below are the result details +INFO: Individual result files: spark-tpc-ds-performance-test/work/query.res +INFO: Summary file: spark-tpc-ds-performance-test/work/run_summary.txt +Press any key to continue +``` + +#### Cleanup option: "(4) - Cleanup" + +This will clean up all of the files generated during option steps 1, 2, and 3. If you use this option, make sure to run the setup steps (1) before running queries using option 2 and 3. + +#### Cleanup option: "(Q) - Quit" + +This will exit the script. + +## Run using a Jupyter notebook in Watson Studio + +1. [Sign up for Watson Studio](#1-sign-up-for-watson-studio) +2. [Create the notebook](#2-create-the-notebook) +3. [Run the notebook](#3-run-the-notebook) +4. [Save and Share](#4-save-and-share) + +### 1. Sign up for Watson Studio + +Sign up for IBM's [Watson Studio](https://dataplatform.cloud.ibm.com/). By creating a project in Watson Studio a free tier ``Object Storage`` service will be created in your IBM Cloud account. + +> Note: When creating your Object Storage service, select the ``Free`` storage type in order to avoid having to pay an upgrade fee. + +Take note of your service names as you will need to select them in the following steps. + +### 2. Create the notebook + +* In [Watson Studio](https://dataplatform.cloud.ibm.com/), click on `Create notebook` to create a notebook. +* Create a project if necessary, provisioning an object storage service if required. +* In the `Assets` tab, select the `Create notebook` option. +* Select the `From URL` tab. +* Enter a name for the notebook. +* Optionally, enter a description for the notebook. +* Enter this Notebook URL: https://github.com/IBM/spark-tpc-ds-performance-test/blob/master/notebooks/run-tpcds-on-spark.ipynb +* Select the free Anaconda runtime. +* Click the `Create` button. + +![](doc/source/images/create-notebook.png) + +### 3. Run the notebook + +When a notebook is executed, what is actually happening is that each code cell in +the notebook is executed, in order, from top to bottom. + +Each code cell is selectable and is preceded by a tag in the left margin. The tag +format is `In [x]:`. Depending on the state of the notebook, the `x` can be: + +* A blank, this indicates that the cell has never been executed. +* A number, this number represents the relative order this code step was executed. +* A `*`, this indicates that the cell is currently executing. + +There are several ways to execute the code cells in your notebook: + +* One cell at a time. + * Select the cell, and then press the `Play` button in the toolbar. +* Batch mode, in sequential order. + * From the `Cell` menu bar, there are several options available. For example, you + can `Run All` cells in your notebook, or you can `Run All Below`, that will + start executing from the first cell under the currently selected cell, and then + continue executing all cells that follow. +* At a scheduled time. + * Press the `Schedule` button located in the top right section of your notebook + panel. Here you can schedule your notebook to be executed once at some future + time, or repeatedly at your specified interval. + +### 4. Save and Share + +#### How to save your work: + +Under the `File` menu, there are several ways to save your notebook: + +* `Save` will simply save the current state of your notebook, without any version + information. +* `Save Version` will save your current state of your notebook with a version tag + that contains a date and time stamp. Up to 10 versions of your notebook can be + saved, each one retrievable by selecting the `Revert To Version` menu item. + +#### How to share your work: + +You can share your notebook by selecting the “Share” button located in the top +right section of your notebook panel. The end result of this action will be a URL +link that will display a “read-only” version of your notebook. You have several +options to specify exactly what you want shared from your notebook: + +* `Only text and output`: will remove all code cells from the notebook view. +* `All content excluding sensitive code cells`: will remove any code cells + that contain a *sensitive* tag. For example, `# @hidden_cell` is used to protect + your dashDB credentials from being shared. +* `All content, including code`: displays the notebook as is. +* A variety of `download as` options are also available in the menu. + +## Considerations while increasing the scale factor. +This Code Pattern walks us through the steps that need to be performed to run the TPC-DS +benchmark with the qualification scale factor(1GB). Since this is a performance benchmark, typically +we need to run the benchmark with varying scale factors to gauge the throughput of the underlying data +processing engine. In the section below, we will briefly touch on things to be considered while increasing +the data and running the workload against a production cluster. + +* Generation of the data in larger scale factor: + In order to increase the scale, please follow the section titled "Scaling and Database Population" in + the [benchmark spec](http://www.tpc.org/tpc_documents_current_versions/pdf/tpc-ds_v2.2.0.pdf). +* Movement of data to the distributed file system: + After generating the data, we need to copy or move them to the underlying distributed file system (typically hdfs) + that your spark cluster is configured to work with. +* Creation of spark tables: + Modify the create table ddl script to change the path to the location of the data after the above copy step. + Additionally we may consider to partition the fact tables for better performance. +* We need to tune several spark configs to get optimal performance. Some of them are discussed in the following + links. + * [Troubleshooting and Tuning spark for heavy work loads](https://developer.ibm.com/hadoop/2016/07/18/troubleshooting-and-tuning-spark-for-heavy-workloads) + * [Scaling up to 100TB](https://developer.ibm.com/hadoop/2017/02/07/experiences-comparing-big-sql-and-spark-sql-at-100tb) + +# Learn more + +* **Data Analytics Code Patterns**: Enjoyed this Code Pattern? Check out our other [Data Analytics Code Patterns](https://developer.ibm.com/technologies/data-science/) +* **AI and Data Code Pattern Playlist**: Bookmark our [playlist](https://www.youtube.com/playlist?list=PLzUbsvIyrNfknNewObx5N7uGZ5FKH0Fde) with all of our Code Pattern videos +* **Watson Studio**: Master the art of data science with IBM's [Watson Studio](https://dataplatform.cloud.ibm.com/) +* **Spark on IBM Cloud**: Need a Spark cluster? Create up to 30 Spark executors on IBM Cloud with our [Spark service](https://cloud.ibm.com/catalog/services/apache-spark) + +# License +This code pattern is licensed under the Apache Software License, Version 2. Separate third party code objects invoked within this code pattern are licensed by their respective providers pursuant to their own separate licenses. Contributions are subject to the [Developer Certificate of Origin, Version 1.1 (DCO)](https://developercertificate.org/) and the [Apache Software License, Version 2](http://www.apache.org/licenses/LICENSE-2.0.txt). + +[Apache Software License (ASL) FAQ](http://www.apache.org/foundation/license-faq.html#WhatDoesItMEAN) diff --git a/tests/tpcdsbench/doc/source/images/architecture.png b/tests/tpcdsbench/doc/source/images/architecture.png new file mode 100644 index 000000000..e84107c9b Binary files /dev/null and b/tests/tpcdsbench/doc/source/images/architecture.png differ diff --git a/tests/tpcdsbench/doc/source/images/create-notebook.png b/tests/tpcdsbench/doc/source/images/create-notebook.png new file mode 100644 index 000000000..90928b990 Binary files /dev/null and b/tests/tpcdsbench/doc/source/images/create-notebook.png differ diff --git a/tests/tpcdsbench/doc/source/images/create-project.png b/tests/tpcdsbench/doc/source/images/create-project.png new file mode 100644 index 000000000..98c212797 Binary files /dev/null and b/tests/tpcdsbench/doc/source/images/create-project.png differ diff --git a/tests/tpcdsbench/notebooks/run-tpcds-on-spark.ipynb b/tests/tpcdsbench/notebooks/run-tpcds-on-spark.ipynb new file mode 100644 index 000000000..c20017af5 --- /dev/null +++ b/tests/tpcdsbench/notebooks/run-tpcds-on-spark.ipynb @@ -0,0 +1,1220 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Explore Spark SQL and its performance using TPC-DS workload\n", + "This notebook sets up the Spark environment to run TPC-DS bench-mark on 1GB scale factor. TPC-DS is a widely used industry standard decision support benchmark that is used to evaluate performance of the data processing engines. Given TPC-DS excercises some key data warehouse features, running TPC-DS successfully reflects the readiness of Spark in terms of addressing the need of a data warehouse application. Apache Spark v2.0 supports all 99 decision support queries that is part of this benchmark. \n", + "\n", + "This notebook is written in scala and is intended to help the spark developers gain understanding on the setup steps required to run the benchmark.\n", + "Please note : Several additional tuning steps may be required when adapting this to an actual spark production cluster." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Get the journey data\n", + "- Clone the tpcds journey repository to get access to all the data and scripts that are required to excercise this journey. \n", + "- Normally the data and queries are generated by running the data and query generation utility from the tpcds toolkit available at http://www.tpc.org/tpcds. However for ease of use, the data and queries are pre-generated for 1GB scale factor. \n", + "- We use the pre-generated data and queries to demonstrate how they can be used to run the tpcds queries against spark." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using cached version of spark-kernel-brunel-all-2.4.jar\n", + "git version 1.8.3.1\n", + "Cloning into 'spark-tpc-ds-performance-test'...\n", + "remote: Counting objects: 886, done. \n", + "remote: Compressing objects: 100% (40/40), done. \n", + "remote: Total 886 (delta 56), reused 58 (delta 42), pack-reused 802 ts: 48% (426/886), 351.89 MiB | 17.94 MiB/s ng objects: 53% (470/886), 351.89 MiB | 17.94 MiB/s \n", + "Receiving objects: 100% (886/886), 363.15 MiB | 18.41 MiB/s, done.\n", + "Resolving deltas: 100% (338/338), done.\n", + "Checking out files: 100% (810/810), done.\n" + ] + } + ], + "source": [ + "import sys.process._\n", + "%AddJar -magic https://brunelvis.org/jar/spark-kernel-brunel-all-2.4.jar\n", + "\"rm -rf spark-tpc-ds-performance-test\" !\n", + "\"git --version\" !\n", + "\"git clone --progress https://github.com/IBM/spark-tpc-ds-performance-test.git\" !" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup variables.\n", + "* Sets up variables that are used in the rest of this notebook.\n", + "* The path variables are relative to the git clone directory.\n", + "* tpcdsDatabaseName is hard-coded to \"TPCDS1G\". This can be changed if a different database name is desired." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TPCDS root directory is at : spark-tpc-ds-performance-test\n", + "TPCDS ddl scripts directory is at: spark-tpc-ds-performance-test/src/ddl/individual\n", + "TPCDS data directory is at: spark-tpc-ds-performance-test/src/data\n", + "TPCDS queries directory is at: spark-tpc-ds-performance-test/src/queries\n" + ] + } + ], + "source": [ + "val tpcdsRootDir = \"spark-tpc-ds-performance-test\"\n", + "val tpcdsWorkDir = \"spark-tpc-ds-performance-test/work\"\n", + "val tpcdsDdlDir = s\"${tpcdsRootDir}/src/ddl/individual\"\n", + "val tpcdsGenDataDir = s\"${tpcdsRootDir}/src/data\"\n", + "val tpcdsQueriesDir = s\"${tpcdsRootDir}/src/queries\"\n", + "val tpcdsDatabaseName = \"TPCDS1G\"\n", + "var totalTime: Long = 0\n", + "println(\"TPCDS root directory is at : \"+ tpcdsRootDir)\n", + "println(\"TPCDS ddl scripts directory is at: \" + tpcdsDdlDir)\n", + "println(\"TPCDS data directory is at: \"+ tpcdsGenDataDir)\n", + "println(\"TPCDS queries directory is at: \"+ tpcdsQueriesDir)\n", + "val spark = SparkSession.\n", + " builder().\n", + " config(\"spark.ui.showConsoleProgress\", false).\n", + " config(\"spark.sql.autoBroadcastJoinThreshold\", -1).\n", + " config(\"spark.sql.crossJoin.enabled\", true).\n", + " getOrCreate()\n", + "\n", + "spark.sparkContext.setLogLevel(\"ERROR\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Utility function definitions.\n", + "* Defines the utility functions that are called from the cells below in the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def clearTableDirectory(tableName: String): Unit = {\n", + " import sys.process._\n", + " val commandStr1 = s\"rm -rf spark-warehouse/tpcds2g.db/${tableName}/*\"\n", + " val commandStr2 = s\"rm -rf spark-warehouse/tpcds2g.db/${tableName}\"\n", + " var exitCode = Process(commandStr1).!\n", + " exitCode = Process(commandStr2).!\n", + "}\n", + "\n", + "def createDatabase(): Unit = {\n", + " spark.sql(s\"DROP DATABASE IF EXISTS ${tpcdsDatabaseName} CASCADE\")\n", + " spark.sql(s\"CREATE DATABASE ${tpcdsDatabaseName}\")\n", + " spark.sql(s\"USE ${tpcdsDatabaseName}\")\n", + "}\n", + "\n", + "/**\n", + " * Function to create a table in spark. It reads the DDL script for each of the\n", + " * tpc-ds table and executes it on Spark.\n", + " */\n", + "def createTable(tableName: String): Unit = {\n", + " println(s\"Creating table $tableName ..\")\n", + " spark.sql(s\"DROP TABLE IF EXISTS $tableName\")\n", + " clearTableDirectory(tableName) \n", + " val (fileName, content) = \n", + " spark.sparkContext.wholeTextFiles(s\"${tpcdsDdlDir}/$tableName.sql\").collect()(0) \n", + " \n", + " // Remove the replace for the .dat once it is fixed in the github repo\n", + " val sqlStmts = content.stripLineEnd\n", + " .replace('\\n', ' ')\n", + " .replace(\"${TPCDS_GENDATA_DIR}\", tpcdsGenDataDir)\n", + " .replace(\"csv\", \"org.apache.spark.sql.execution.datasources.csv.CSVFileFormat\").split(\";\")\n", + " sqlStmts.map(stmt => spark.sql(stmt)) \n", + "} \n", + "\n", + "import scala.collection.mutable.ArrayBuffer\n", + "import org.apache.spark.sql.DataFrame\n", + "\n", + "def runQuery(queryStr: String,\n", + " individual: Boolean = true,\n", + " resultDir: String): Seq[(String, Double, Int, String)] = {\n", + " val querySummary = ArrayBuffer.empty[(String, Double, Int, String)] \n", + " val queryName = s\"${tpcdsQueriesDir}/query${queryStr}.sql\" \n", + " val (_, content) = spark.sparkContext.wholeTextFiles(queryName).collect()(0) \n", + " val queries = content.split(\"\\n\")\n", + " .filterNot (_.startsWith(\"--\"))\n", + " .mkString(\" \").split(\";\")\n", + " \n", + " var cnt = 1 \n", + " for (query <- queries) {\n", + " val start = System.nanoTime()\n", + " val df = spark.sql(query) \n", + " val result = spark.sql(query).collect \n", + " val timeElapsed = (System.nanoTime() - start) / 1000000000\n", + " val name = if (queries.length > 1) {\n", + " s\"query${queryStr}-${cnt}\"\n", + " } else {\n", + " s\"query${queryStr}\"\n", + " } \n", + " val resultFile = s\"${resultDir}/${name}-notebook.res\" \n", + " df.coalesce(1)\n", + " .write.format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .mode(\"overwrite\")\n", + " .save(resultFile)\n", + " totalTime = totalTime + timeElapsed\n", + " \n", + " querySummary += Tuple4.apply(name, timeElapsed, result.length, resultFile)\n", + " cnt += 1 \n", + " }\n", + " querySummary \n", + "}\n", + "\n", + "// run function for each table in tables array\n", + "def forEachTable(tables: Array[String], f: (String) => Unit): Unit = {\n", + " for ( table <- tables) {\n", + " try {\n", + " f(table)\n", + " } catch {\n", + " case e: Throwable => {\n", + " println(\"EXCEPTION!! \" + e.getMessage())\n", + " throw e\n", + " }\n", + " }\n", + " }\n", + "}\n", + "\n", + "def runIndividualQuery(queryNum: Int, resultDir: String = tpcdsWorkDir ): DataFrame = {\n", + " val queryStr = \"%02d\".format(queryNum) \n", + " val testSummary = ArrayBuffer.empty[(String, Double, Int, String)] \n", + " try { \n", + " println(s\"Running TPC-DS Query : $queryStr\") \n", + " testSummary ++= runQuery(queryStr, true, resultDir)\n", + " } catch {\n", + " case e: Throwable => {\n", + " println(\"Error in query \"+ queryNum + \" msg = \" + e.getMessage)\n", + " }\n", + " }\n", + " testSummary.toDF(\"QueryName\",\"ElapsedTime\",\"RowsReturned\", \"ResultLocation\")\n", + "}\n", + "\n", + "def runAllQueries(resultDir: String = tpcdsWorkDir): DataFrame = {\n", + " val testSummary = ArrayBuffer.empty[(String, Double, Int, String)] \n", + " var queryErrors = 0\n", + " for (i <- 1 to 99) {\n", + " try{\n", + " val queryStr = \"%02d\".format(i)\n", + " println(s\"Running TPC-DS Query : $queryStr\") \n", + " testSummary ++= runQuery(queryStr, false, resultDir)\n", + " } catch {\n", + " case e: Throwable => {\n", + " println(\"Error in query \"+ i + \" msg = \" + e.getMessage)\n", + " queryErrors += 1\n", + " }\n", + " }\n", + " }\n", + "\n", + " println(\"=====================================================\")\n", + " if ( queryErrors > 0) {\n", + " println(s\"Query execution failed with $queryErrors errors\")\n", + " } else {\n", + " println(\"All TPC-DS queries ran successfully\")\n", + " }\n", + " println (s\"Total Elapsed Time so far: ${totalTime} seconds.\")\n", + " println(\"=====================================================\")\n", + " testSummary.toDF(\"QueryName\",\"ElapsedTime\",\"RowsReturned\", \"ResultLocation\")\n", + "}\n", + "\n", + "def displaySummary(summaryDF: DataFrame): Unit = {\n", + " summaryDF.select(\"QueryName\", \"ElapsedTime\", \"RowsReturned\").show(10000)\n", + "}\n", + "\n", + "def displayResult(queryNum: Int, summaryDF: DataFrame) = {\n", + " val queryStr = \"%02d\".format(queryNum)\n", + " // Find result files for this query number. For some queries there are\n", + " // multiple result files. \n", + " val files = summaryDF.where(s\"queryName like 'query${queryStr}%'\").select(\"ResultLocation\").collect()\n", + " for (file <- files) {\n", + " val fileName = file.getString(0)\n", + " val df = spark.read\n", + " .format(\"csv\")\n", + " .option(\"header\", \"true\") //reading the headers\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .load(fileName)\n", + " val numRows:Int = df.count().toInt\n", + " df.show(numRows, truncate=false)\n", + " }\n", + "}\n", + "\n", + "def explainQuery(queryNum: Int) = {\n", + " val queryStr = \"%02d\".format(queryNum) \n", + " val queryName = s\"${tpcdsQueriesDir}/query${queryStr}.sql\" \n", + " val (_, content) = spark.sparkContext.wholeTextFiles(queryName).collect()(0) \n", + " val queries = content.split(\"\\n\")\n", + " .filterNot (_.startsWith(\"--\"))\n", + " .mkString(\" \").split(\";\")\n", + " \n", + " for (query <- queries) { \n", + " spark.sql(query).explain(true) \n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup the TPC-DS schema\n", + "* Create the database as specified by tpcdsDatabaseName\n", + "* Create all the TPC-DS tables\n", + "* Load the data into the tables in parquet format. Since the data generated by tpc-ds toolkit is in CSV format, we do the loading in multi steps.\n", + " * Step 1: we create tables in csv format by pointing the location to the generated data\n", + " * Step 2: we create parquet tables by using CTAS to convert text data into parquet format\n", + " * Step 3: we drop the text based tables as we longer need them" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Creating table call_center ..\n", + "Creating table catalog_sales ..\n", + "Creating table customer_demographics ..\n", + "Creating table income_band ..\n", + "Creating table promotion ..\n", + "Creating table store ..\n", + "Creating table time_dim ..\n", + "Creating table web_returns ..\n", + "Creating table catalog_page ..\n", + "Creating table customer ..\n", + "Creating table date_dim ..\n", + "Creating table inventory ..\n", + "Creating table reason ..\n", + "Creating table store_returns ..\n", + "Creating table warehouse ..\n", + "Creating table web_sales ..\n", + "Creating table catalog_returns ..\n", + "Creating table customer_address ..\n", + "Creating table household_demographics ..\n", + "Creating table item ..\n", + "Creating table ship_mode ..\n", + "Creating table store_sales ..\n", + "Creating table web_page ..\n", + "Creating table web_site ..\n" + ] + } + ], + "source": [ + "// TPC-DS table names.\n", + "val tables = Array(\"call_center\", \"catalog_sales\",\n", + " \"customer_demographics\", \"income_band\",\n", + " \"promotion\", \"store\", \"time_dim\", \"web_returns\",\n", + " \"catalog_page\", \"customer\", \"date_dim\",\n", + " \"inventory\", \"reason\", \"store_returns\", \"warehouse\",\n", + " \"web_sales\", \"catalog_returns\", \"customer_address\",\n", + " \"household_demographics\", \"item\", \"ship_mode\", \"store_sales\",\n", + " \"web_page\", \"web_site\" )\n", + "\n", + "// Create database\n", + "createDatabase\n", + "\n", + "// Create table\n", + "forEachTable(tables, table => createTable(table))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Verify table creation and data loading.\n", + "* Run a simple Spark SQL query to get the count of rows\n", + "* Verify that the row counts are as expected" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=====================================================\n", + "Loaded and verified the table counts successfully\n", + "=====================================================\n" + ] + } + ], + "source": [ + "// Run a count query and get the counts\n", + "val rowCounts = tables.map { table =>\n", + " spark.table(table).count()\n", + "}\n", + "\n", + "val expectedCounts = Array (\n", + " 6, 1441548, 1920800, 20, 300, 12, 86400,\n", + " 71763, 11718, 100000, 73049, 11745000, \n", + " 35, 287514, 5, 719384, 144067, 50000, 7200,\n", + " 18000, 20, 2880404, 60, 30\n", + ")\n", + "\n", + "var errorCount = 0;\n", + "val zippedCountsWithIndex = rowCounts.zip(expectedCounts).zipWithIndex\n", + "for ((pair, index) <- zippedCountsWithIndex) {\n", + " if (pair._1 != pair._2) {\n", + " println(s\"\"\"ERROR!! Row counts for ${tables(index)} does not match.\n", + " Expected=${expectedCounts(index)} but found ${rowCounts(index)}\"\"\")\n", + " errorCount += 1\n", + " }\n", + "}\n", + "\n", + "println(\"=====================================================\")\n", + "if ( errorCount > 0) {\n", + " println(s\"Load verification failed with $errorCount errors\")\n", + "} else {\n", + " println(\"Loaded and verified the table counts successfully\")\n", + "}\n", + "println(\"=====================================================\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "## Run a single query\n", + "* Run a query given a query number between 1 to 99\n", + "* Display the query results, the elapsed time to execute the query and the number of rows returned for the query\n", + "* To run a different query, please change the QUERY_NUM to a valid value from 1 to 99" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running TPC-DS Query : 01\n", + "+---------+-----------+------------+\n", + "|QueryName|ElapsedTime|RowsReturned|\n", + "+---------+-----------+------------+\n", + "| query01| 12.0| 100|\n", + "+---------+-----------+------------+\n", + "\n", + "+----------------+\n", + "|c_customer_id |\n", + "+----------------+\n", + "|AAAAAAAAAAABBAAA|\n", + "|AAAAAAAAAAADBAAA|\n", + "|AAAAAAAAAAADBAAA|\n", + "|AAAAAAAAAAAKAAAA|\n", + "|AAAAAAAAAABDAAAA|\n", + "|AAAAAAAAAABHBAAA|\n", + "|AAAAAAAAAABLAAAA|\n", + "|AAAAAAAAAABMAAAA|\n", + "|AAAAAAAAAACHAAAA|\n", + "|AAAAAAAAAACMAAAA|\n", + "|AAAAAAAAAADDAAAA|\n", + "|AAAAAAAAAADGAAAA|\n", + "|AAAAAAAAAADGBAAA|\n", + "|AAAAAAAAAADGBAAA|\n", + "|AAAAAAAAAADPAAAA|\n", + "|AAAAAAAAAAEBAAAA|\n", + "|AAAAAAAAAAEFBAAA|\n", + "|AAAAAAAAAAEGBAAA|\n", + "|AAAAAAAAAAEIAAAA|\n", + "|AAAAAAAAAAEMAAAA|\n", + "|AAAAAAAAAAFAAAAA|\n", + "|AAAAAAAAAAFPAAAA|\n", + "|AAAAAAAAAAGGBAAA|\n", + "|AAAAAAAAAAGHBAAA|\n", + "|AAAAAAAAAAGJAAAA|\n", + "|AAAAAAAAAAGMAAAA|\n", + "|AAAAAAAAAAHEBAAA|\n", + "|AAAAAAAAAAHFBAAA|\n", + "|AAAAAAAAAAIEBAAA|\n", + "|AAAAAAAAAAJGBAAA|\n", + "|AAAAAAAAAAJHBAAA|\n", + "|AAAAAAAAAAKCAAAA|\n", + "|AAAAAAAAAAKCAAAA|\n", + "|AAAAAAAAAAKJAAAA|\n", + "|AAAAAAAAAAKMAAAA|\n", + "|AAAAAAAAAAKMAAAA|\n", + "|AAAAAAAAAALAAAAA|\n", + "|AAAAAAAAAALABAAA|\n", + "|AAAAAAAAAALGAAAA|\n", + "|AAAAAAAAAALHBAAA|\n", + "|AAAAAAAAAALJAAAA|\n", + "|AAAAAAAAAANHAAAA|\n", + "|AAAAAAAAAANHBAAA|\n", + "|AAAAAAAAAANJAAAA|\n", + "|AAAAAAAAAANMAAAA|\n", + "|AAAAAAAAAANMAAAA|\n", + "|AAAAAAAAAANNAAAA|\n", + "|AAAAAAAAAAOBBAAA|\n", + "|AAAAAAAAAAODBAAA|\n", + "|AAAAAAAAAAOLAAAA|\n", + "|AAAAAAAAAAPGBAAA|\n", + "|AAAAAAAAABAAAAAA|\n", + "|AAAAAAAAABAEAAAA|\n", + "|AAAAAAAAABAEBAAA|\n", + "|AAAAAAAAABAFBAAA|\n", + "|AAAAAAAAABAIAAAA|\n", + "|AAAAAAAAABAOAAAA|\n", + "|AAAAAAAAABBDBAAA|\n", + "|AAAAAAAAABCFAAAA|\n", + "|AAAAAAAAABCHBAAA|\n", + "|AAAAAAAAABDHAAAA|\n", + "|AAAAAAAAABENAAAA|\n", + "|AAAAAAAAABFEBAAA|\n", + "|AAAAAAAAABFGAAAA|\n", + "|AAAAAAAAABFMAAAA|\n", + "|AAAAAAAAABFPAAAA|\n", + "|AAAAAAAAABGFAAAA|\n", + "|AAAAAAAAABGFBAAA|\n", + "|AAAAAAAAABGJAAAA|\n", + "|AAAAAAAAABIBBAAA|\n", + "|AAAAAAAAABICBAAA|\n", + "|AAAAAAAAABIIAAAA|\n", + "|AAAAAAAAABJNAAAA|\n", + "|AAAAAAAAABKGBAAA|\n", + "|AAAAAAAAABLOAAAA|\n", + "|AAAAAAAAABLPAAAA|\n", + "|AAAAAAAAABMABAAA|\n", + "|AAAAAAAAABMPAAAA|\n", + "|AAAAAAAAABNAAAAA|\n", + "|AAAAAAAAABNCBAAA|\n", + "|AAAAAAAAABNEBAAA|\n", + "|AAAAAAAAABNLAAAA|\n", + "|AAAAAAAAABNOAAAA|\n", + "|AAAAAAAAABNPAAAA|\n", + "|AAAAAAAAABOAAAAA|\n", + "|AAAAAAAAABOFBAAA|\n", + "|AAAAAAAAABOOAAAA|\n", + "|AAAAAAAAABOPAAAA|\n", + "|AAAAAAAAABPEAAAA|\n", + "|AAAAAAAAACADAAAA|\n", + "|AAAAAAAAACAFAAAA|\n", + "|AAAAAAAAACAFAAAA|\n", + "|AAAAAAAAACAHBAAA|\n", + "|AAAAAAAAACAJAAAA|\n", + "|AAAAAAAAACBDAAAA|\n", + "|AAAAAAAAACBDAAAA|\n", + "|AAAAAAAAACBEBAAA|\n", + "|AAAAAAAAACBNAAAA|\n", + "|AAAAAAAAACBPAAAA|\n", + "|AAAAAAAAACCHAAAA|\n", + "+----------------+\n", + "\n" + ] + } + ], + "source": [ + "val QUERY_NUM = 1\n", + "val result = runIndividualQuery(QUERY_NUM)\n", + "displaySummary(result)\n", + "displayResult(QUERY_NUM, result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run all the TPC-DS queries\n", + "* Runs all the queries starting from 1 to 99\n", + "* The query results are saved and can be queried by calling getResults method.\n", + "* The summary will be shown at the end." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running TPC-DS Query : 01\n", + "Running TPC-DS Query : 02\n", + "Running TPC-DS Query : 03\n", + "Running TPC-DS Query : 04\n", + "Running TPC-DS Query : 05\n", + "Running TPC-DS Query : 06\n", + "Running TPC-DS Query : 07\n", + "Running TPC-DS Query : 08\n", + "Running TPC-DS Query : 09\n", + "Running TPC-DS Query : 10\n", + "Running TPC-DS Query : 11\n", + "Running TPC-DS Query : 12\n", + "Running TPC-DS Query : 13\n", + "Running TPC-DS Query : 14\n", + "Running TPC-DS Query : 15\n", + "Running TPC-DS Query : 16\n", + "Running TPC-DS Query : 17\n", + "Running TPC-DS Query : 18\n", + "Running TPC-DS Query : 19\n", + "Running TPC-DS Query : 20\n", + "Running TPC-DS Query : 21\n", + "Running TPC-DS Query : 22\n", + "Running TPC-DS Query : 23\n", + "Running TPC-DS Query : 24\n", + "Running TPC-DS Query : 25\n", + "Running TPC-DS Query : 26\n", + "Running TPC-DS Query : 27\n", + "Running TPC-DS Query : 28\n", + "Running TPC-DS Query : 29\n", + "Running TPC-DS Query : 30\n", + "Running TPC-DS Query : 31\n", + "Running TPC-DS Query : 32\n", + "Running TPC-DS Query : 33\n", + "Running TPC-DS Query : 34\n", + "Running TPC-DS Query : 35\n", + "Running TPC-DS Query : 36\n", + "Running TPC-DS Query : 37\n", + "Running TPC-DS Query : 38\n", + "Running TPC-DS Query : 39\n", + "Running TPC-DS Query : 40\n", + "Running TPC-DS Query : 41\n", + "Running TPC-DS Query : 42\n", + "Running TPC-DS Query : 43\n", + "Running TPC-DS Query : 44\n", + "Running TPC-DS Query : 45\n", + "Running TPC-DS Query : 46\n", + "Running TPC-DS Query : 47\n", + "Running TPC-DS Query : 48\n", + "Running TPC-DS Query : 49\n", + "Running TPC-DS Query : 50\n", + "Running TPC-DS Query : 51\n", + "Running TPC-DS Query : 52\n", + "Running TPC-DS Query : 53\n", + "Running TPC-DS Query : 54\n", + "Running TPC-DS Query : 55\n", + "Running TPC-DS Query : 56\n", + "Running TPC-DS Query : 57\n", + "Running TPC-DS Query : 58\n", + "Running TPC-DS Query : 59\n", + "Running TPC-DS Query : 60\n", + "Running TPC-DS Query : 61\n", + "Running TPC-DS Query : 62\n", + "Running TPC-DS Query : 63\n", + "Running TPC-DS Query : 64\n", + "Running TPC-DS Query : 65\n", + "Running TPC-DS Query : 66\n", + "Running TPC-DS Query : 67\n", + "Running TPC-DS Query : 68\n", + "Running TPC-DS Query : 69\n", + "Running TPC-DS Query : 70\n", + "Running TPC-DS Query : 71\n", + "Running TPC-DS Query : 72\n", + "Running TPC-DS Query : 73\n", + "Running TPC-DS Query : 74\n", + "Running TPC-DS Query : 75\n", + "Running TPC-DS Query : 76\n", + "Running TPC-DS Query : 77\n", + "Running TPC-DS Query : 78\n", + "Running TPC-DS Query : 79\n", + "Running TPC-DS Query : 80\n", + "Running TPC-DS Query : 81\n", + "Running TPC-DS Query : 82\n", + "Running TPC-DS Query : 83\n", + "Running TPC-DS Query : 84\n", + "Running TPC-DS Query : 85\n", + "Running TPC-DS Query : 86\n", + "Running TPC-DS Query : 87\n", + "Running TPC-DS Query : 88\n", + "Running TPC-DS Query : 89\n", + "Running TPC-DS Query : 90\n", + "Running TPC-DS Query : 91\n", + "Running TPC-DS Query : 92\n", + "Running TPC-DS Query : 93\n", + "Running TPC-DS Query : 94\n", + "Running TPC-DS Query : 95\n", + "Running TPC-DS Query : 96\n", + "Running TPC-DS Query : 97\n", + "Running TPC-DS Query : 98\n", + "Running TPC-DS Query : 99\n", + "=====================================================\n", + "All TPC-DS queries ran successfully\n", + "Total Elapsed Time so far: 1276 seconds.\n", + "=====================================================\n", + "+---------+-----------+------------+\n", + "|QueryName|ElapsedTime|RowsReturned|\n", + "+---------+-----------+------------+\n", + "| query01| 7.0| 100|\n", + "| query02| 6.0| 2513|\n", + "| query03| 6.0| 89|\n", + "| query04| 41.0| 8|\n", + "| query05| 9.0| 100|\n", + "| query06| 18.0| 45|\n", + "| query07| 6.0| 100|\n", + "| query08| 8.0| 5|\n", + "| query09| 1.0| 1|\n", + "| query10| 10.0| 5|\n", + "| query11| 25.0| 88|\n", + "| query12| 10.0| 100|\n", + "| query13| 8.0| 1|\n", + "|query14-1| 47.0| 100|\n", + "|query14-2| 52.0| 100|\n", + "| query15| 8.0| 100|\n", + "| query16| 15.0| 1|\n", + "| query17| 8.0| 1|\n", + "| query18| 7.0| 100|\n", + "| query19| 4.0| 100|\n", + "| query20| 5.0| 100|\n", + "| query21| 9.0| 100|\n", + "| query22| 8.0| 100|\n", + "|query23-1| 27.0| 1|\n", + "|query23-2| 29.0| 4|\n", + "|query24-1| 11.0| 0|\n", + "|query24-2| 9.0| 0|\n", + "| query25| 6.0| 1|\n", + "| query26| 3.0| 100|\n", + "| query27| 4.0| 100|\n", + "| query28| 4.0| 1|\n", + "| query29| 8.0| 1|\n", + "| query30| 8.0| 100|\n", + "| query31| 13.0| 51|\n", + "| query32| 4.0| 1|\n", + "| query33| 8.0| 100|\n", + "| query34| 5.0| 451|\n", + "| query35| 10.0| 100|\n", + "| query36| 7.0| 100|\n", + "| query37| 2.0| 1|\n", + "| query38| 15.0| 1|\n", + "|query39-1| 13.0| 246|\n", + "|query39-2| 18.0| 17|\n", + "| query40| 7.0| 100|\n", + "| query41| 1.0| 4|\n", + "| query42| 2.0| 10|\n", + "| query43| 2.0| 6|\n", + "| query44| 3.0| 10|\n", + "| query45| 12.0| 19|\n", + "| query46| 6.0| 100|\n", + "| query47| 38.0| 100|\n", + "| query48| 6.0| 1|\n", + "| query49| 4.0| 32|\n", + "| query50| 8.0| 6|\n", + "| query51| 37.0| 100|\n", + "| query52| 3.0| 100|\n", + "| query53| 9.0| 100|\n", + "| query54| 6.0| 1|\n", + "| query55| 2.0| 100|\n", + "| query56| 5.0| 100|\n", + "| query57| 26.0| 100|\n", + "| query58| 14.0| 3|\n", + "| query59| 4.0| 100|\n", + "| query60| 11.0| 100|\n", + "| query61| 16.0| 1|\n", + "| query62| 7.0| 100|\n", + "| query63| 9.0| 100|\n", + "| query64| 70.0| 10|\n", + "| query65| 8.0| 100|\n", + "| query66| 7.0| 5|\n", + "| query67| 14.0| 100|\n", + "| query68| 5.0| 100|\n", + "| query69| 6.0| 100|\n", + "| query70| 4.0| 3|\n", + "| query71| 6.0| 1018|\n", + "| query72| 51.0| 100|\n", + "| query73| 3.0| 5|\n", + "| query74| 31.0| 92|\n", + "| query75| 60.0| 100|\n", + "| query76| 6.0| 100|\n", + "| query77| 42.0| 44|\n", + "| query78| 27.0| 100|\n", + "| query79| 5.0| 100|\n", + "| query80| 24.0| 100|\n", + "| query81| 12.0| 100|\n", + "| query82| 2.0| 2|\n", + "| query83| 12.0| 21|\n", + "| query84| 2.0| 25|\n", + "| query85| 5.0| 6|\n", + "| query86| 4.0| 100|\n", + "| query87| 12.0| 1|\n", + "| query88| 9.0| 1|\n", + "| query89| 12.0| 100|\n", + "| query90| 3.0| 1|\n", + "| query91| 5.0| 1|\n", + "| query92| 3.0| 1|\n", + "| query93| 3.0| 100|\n", + "| query94| 9.0| 1|\n", + "| query95| 12.0| 1|\n", + "| query96| 4.0| 1|\n", + "| query97| 9.0| 1|\n", + "| query98| 12.0| 2516|\n", + "| query99| 5.0| 90|\n", + "+---------+-----------+------------+\n", + "\n" + ] + } + ], + "source": [ + "val result = runAllQueries()\n", + "displaySummary(result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Display Result for a individual Query\n", + "* Reads the result file for the given query stored when thery are run in previous steps.\n", + "* Certain queries have multiple associated result files. The result files are read in sequence and\n", + " results are displayed.\n", + "* If the result file(s) are not found , then an error is displayed. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "displayResult(1, result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Display SQL Execution Plan\n", + "* Display the analyzed, optimized and phyical plan for a given query.\n", + "* Can be used by developers for debugging purposes.\n", + "* QUERY_NUM can be changed to display the plan for different query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "val QUERY_NUM=1\n", + "explainQuery(QUERY_NUM)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + "
\n", + "\n", + "\n", + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%brunel\n", + "data('result') bar x(QueryName) y(ElapsedTime) title(\"Query Execution Time in seconds\", \"Execution Summary\":footer)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Learn more\n", + "Visit [Apache Spark](https://spark.apache.org) for learning about spark. For questions or requests plese visit [Spark Community](https://spark.apache.org/community.html). To get involved , see [Contributing to Apache Spark](https://spark.apache.org/contributing.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Authors\n", + "* Dilip Biswal is a Senior Software Engineer at the Spark Technology Center at IBM. He is an active Apache Spark contributor and works in the open source community.\n", + " He is experienced in Relational Databases, Distributed Computing and Big Data Analytics. He has extensively worked on SQL engines like Informix, Derby, and Big SQL.\n", + "* Sunitha Kambhampati is an Advisory Software Engineer at the Spark Technology Center at IBM. She is an Apache Spark contributor and works in the open source community. She is experienced in Big Data Analytics.\n", + "* Xin Wu is an Advisory Software Engineer and is an active contributor for Apache Spark. He has experiences in distributed query processing engines like BigSQL, DB2." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala 2.11 with Spark 2.1", + "language": "scala", + "name": "scala-spark21" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".scala", + "mimetype": "text/x-scala", + "name": "scala", + "pygments_lexer": "scala", + "version": "2.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/tests/tpcdsbench/src/ddl/create_database.sql b/tests/tpcdsbench/src/ddl/create_database.sql new file mode 100644 index 000000000..737c1a300 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/create_database.sql @@ -0,0 +1,2 @@ +CREATE DATABASE IF NOT EXISTS ${TPCDS_DBNAME} + COMMENT 'For TPCDS at 1GB scale factor'; diff --git a/tests/tpcdsbench/src/ddl/create_tables.sql b/tests/tpcdsbench/src/ddl/create_tables.sql new file mode 100644 index 000000000..af11da86e --- /dev/null +++ b/tests/tpcdsbench/src/ddl/create_tables.sql @@ -0,0 +1,774 @@ +------------------------------------------------------------------------------ +-- Licensed Materials - Property of IBM +-- +-- (C) COPYRIGHT International Business Machines Corp. 2014 +-- All Rights Reserved. +-- +-- US Government Users Restricted Rights - Use, duplication or +-- disclosure restricted by GSA ADP Schedule Contract with IBM Corp. +------------------------------------------------------------------------------ + +USE ${TPCDS_DBNAME}; + +drop table if exists call_center_text; +create table call_center_text +( + cc_call_center_sk int, + cc_call_center_id string, + cc_rec_start_date string, + cc_rec_end_date string, + cc_closed_date_sk int, + cc_open_date_sk int, + cc_name string, + cc_class string, + cc_employees int, + cc_sq_ft int, + cc_hours string, + cc_manager string, + cc_mkt_id int, + cc_mkt_class string, + cc_mkt_desc string, + cc_market_manager string, + cc_division int, + cc_division_name string, + cc_company int, + cc_company_name string, + cc_street_number string, + cc_street_name string, + cc_street_type string, + cc_suite_number string, + cc_city string, + cc_county string, + cc_state string, + cc_zip string, + cc_country string, + cc_gmt_offset double, + cc_tax_percentage double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/call_center.dat") +; +drop table if exists call_center; +create table call_center +using parquet +as (select * from call_center_text) +; +drop table if exists call_center_text; + +drop table if exists catalog_page_text; +create table catalog_page_text +( + cp_catalog_page_sk int, + cp_catalog_page_id string, + cp_start_date_sk int, + cp_end_date_sk int, + cp_department string, + cp_catalog_number int, + cp_catalog_page_number int, + cp_description string, + cp_type string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/catalog_page.dat") +; +drop table if exists catalog_page; +create table catalog_page +using parquet +as (select * from catalog_page_text) +; +drop table if exists catalog_page_text; + +drop table if exists catalog_returns_text; +create table catalog_returns_text +( + cr_returned_date_sk int, + cr_returned_time_sk int, + cr_item_sk int, + cr_refunded_customer_sk int, + cr_refunded_cdemo_sk int, + cr_refunded_hdemo_sk int, + cr_refunded_addr_sk int, + cr_returning_customer_sk int, + cr_returning_cdemo_sk int, + cr_returning_hdemo_sk int, + cr_returning_addr_sk int, + cr_call_center_sk int, + cr_catalog_page_sk int, + cr_ship_mode_sk int, + cr_warehouse_sk int, + cr_reason_sk int, + cr_order_number int, + cr_return_quantity int, + cr_return_amount double, + cr_return_tax double, + cr_return_amt_inc_tax double, + cr_fee double, + cr_return_ship_cost double, + cr_refunded_cash double, + cr_reversed_charge double, + cr_store_credit double, + cr_net_loss double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/catalog_returns.dat") +; +drop table if exists catalog_returns; +create table catalog_returns +using parquet +as (select * from catalog_returns_text) +; +drop table if exists catalog_returns_text; + + +drop table if exists catalog_sales_text; +create table catalog_sales_text +( + cs_sold_date_sk int, + cs_sold_time_sk int, + cs_ship_date_sk int, + cs_bill_customer_sk int, + cs_bill_cdemo_sk int, + cs_bill_hdemo_sk int, + cs_bill_addr_sk int, + cs_ship_customer_sk int, + cs_ship_cdemo_sk int, + cs_ship_hdemo_sk int, + cs_ship_addr_sk int, + cs_call_center_sk int, + cs_catalog_page_sk int, + cs_ship_mode_sk int, + cs_warehouse_sk int, + cs_item_sk int, + cs_promo_sk int, + cs_order_number int, + cs_quantity int, + cs_wholesale_cost double, + cs_list_price double, + cs_sales_price double, + cs_ext_discount_amt double, + cs_ext_sales_price double, + cs_ext_wholesale_cost double, + cs_ext_list_price double, + cs_ext_tax double, + cs_coupon_amt double, + cs_ext_ship_cost double, + cs_net_paid double, + cs_net_paid_inc_tax double, + cs_net_paid_inc_ship double, + cs_net_paid_inc_ship_tax double, + cs_net_profit double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/catalog_sales.dat") +; +drop table if exists catalog_sales; +create table catalog_sales +using parquet +as (select * from catalog_sales_text) +; +drop table if exists catalog_sales_text; + +drop table if exists customer_text; +create table customer_text +( + c_customer_sk int, + c_customer_id string, + c_current_cdemo_sk int, + c_current_hdemo_sk int, + c_current_addr_sk int, + c_first_shipto_date_sk int, + c_first_sales_date_sk int, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_day int, + c_birth_month int, + c_birth_year int, + c_birth_country string, + c_login string, + c_email_address string, + c_last_review_date string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/customer.dat") +; +drop table if exists customer; +create table customer +using parquet +as (select * from customer_text) +; +drop table if exists customer_text; + + +drop table if exists customer_address_text; +create table customer_address_text +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset double, + ca_location_type string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/customer_address.dat") +; +drop table if exists customer_address; +create table customer_address +using parquet +as (select * from customer_address_text) +; +drop table if exists customer_address_text; + +drop table if exists customer_demographics_text; +create table customer_demographics_text +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/customer_demographics.dat") +; +drop table if exists customer_demographics; +create table customer_demographics +using parquet +as (select * from customer_demographics_text) +; +drop table if exists customer_demographics_text; + +drop table if exists date_dim_text; +create table date_dim_text +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/date_dim.dat") +; +drop table if exists date_dim; +create table date_dim +using parquet +as (select * from date_dim_text) +; +drop table if exists date_dim_text; + +drop table if exists household_demographics_text; +create table household_demographics_text +( + hd_demo_sk int, + hd_income_band_sk int, + hd_buy_potential string, + hd_dep_count int, + hd_vehicle_count int +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/household_demographics.dat") +; +drop table if exists household_demographics; +create table household_demographics +using parquet +as (select * from household_demographics_text) +; +drop table if exists household_demographics_text; + +drop table if exists income_band_text; +create table income_band_text +( + ib_income_band_sk int, + ib_lower_bound int, + ib_upper_bound int +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/income_band.dat") +; +drop table if exists income_band; +create table income_band +using parquet +as (select * from income_band_text) +; +drop table if exists income_band_text; + +drop table if exists inventory_text; +create table inventory_text +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand bigint +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/inventory.dat") +; +drop table if exists inventory; +create table inventory +using parquet +as (select * from inventory_text) +; +drop table if exists inventory_text; + +drop table if exists item_text; +create table item_text +( + i_item_sk int, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/item.dat") +; +drop table if exists item; +create table item +using parquet +as (select * from item_text) +; +drop table if exists item_text; + +drop table if exists promotion_text; +create table promotion_text +( + p_promo_sk int, + p_promo_id string, + p_start_date_sk int, + p_end_date_sk int, + p_item_sk int, + p_cost double, + p_response_target int, + p_promo_name string, + p_channel_dmail string, + p_channel_email string, + p_channel_catalog string, + p_channel_tv string, + p_channel_radio string, + p_channel_press string, + p_channel_event string, + p_channel_demo string, + p_channel_details string, + p_purpose string, + p_discount_active string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/promotion.dat") +; +drop table if exists promotion; +create table promotion +using parquet +as (select * from promotion_text) +; +drop table if exists promotion_text; + +drop table if exists reason_text; +create table reason_text +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/reason.dat") +; +drop table if exists reason; +create table reason +using parquet +as (select * from reason_text) +; +drop table if exists reason_text; + +drop table if exists ship_mode_text; +create table ship_mode_text +( + sm_ship_mode_sk int, + sm_ship_mode_id string, + sm_type string, + sm_code string, + sm_carrier string, + sm_contract string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/ship_mode.dat") +; +drop table if exists ship_mode; +create table ship_mode +using parquet +as (select * from ship_mode_text) +; +drop table if exists ship_mode_text; + +drop table if exists store_text; +create table store_text +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/store.dat") +; +drop table if exists store; +create table store +using parquet +as (select * from store_text) +; +drop table if exists store_text; + +drop table if exists store_returns_text; +create table store_returns_text +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt double, + sr_return_tax double, + sr_return_amt_inc_tax double, + sr_fee double, + sr_return_ship_cost double, + sr_refunded_cash double, + sr_reversed_charge double, + sr_store_credit double, + sr_net_loss double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/store_returns.dat") +; +drop table if exists store_returns; +create table store_returns +using parquet +as (select * from store_returns_text) +; +drop table if exists store_returns_text; + +drop table if exists store_sales_text; +create table store_sales_text +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/store_sales.dat") +; +drop table if exists store_sales; +create table store_sales +using parquet +as (select * from store_sales_text) +; +drop table if exists store_sales_text; + +drop table if exists time_dim_text; +create table time_dim_text +( + t_time_sk int, + t_time_id string, + t_time int, + t_hour int, + t_minute int, + t_second int, + t_am_pm string, + t_shift string, + t_sub_shift string, + t_meal_time string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/time_dim.dat") +; +drop table if exists time_dim; +create table time_dim +using parquet +as (select * from time_dim_text) +; +drop table if exists time_dim_text; + +drop table if exists warehouse_text; +create table warehouse_text +( + w_warehouse_sk int, + w_warehouse_id string, + w_warehouse_name string, + w_warehouse_sq_ft int, + w_street_number string, + w_street_name string, + w_street_type string, + w_suite_number string, + w_city string, + w_county string, + w_state string, + w_zip string, + w_country string, + w_gmt_offset double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/warehouse.dat") +; +drop table if exists warehouse; +create table warehouse +using parquet +as (select * from warehouse_text) +; +drop table if exists warehouse_text; + +drop table if exists web_page_text; +create table web_page_text +( + wp_web_page_sk int, + wp_web_page_id string, + wp_rec_start_date string, + wp_rec_end_date string, + wp_creation_date_sk int, + wp_access_date_sk int, + wp_autogen_flag string, + wp_customer_sk int, + wp_url string, + wp_type string, + wp_char_count int, + wp_link_count int, + wp_image_count int, + wp_max_ad_count int +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/web_page.dat") +; +drop table if exists web_page; +create table web_page +using parquet +as (select * from web_page_text) +; +drop table if exists web_page_text; + +drop table if exists web_returns_text; +create table web_returns_text +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt double, + wr_return_tax double, + wr_return_amt_inc_tax double, + wr_fee double, + wr_return_ship_cost double, + wr_refunded_cash double, + wr_reversed_charge double, + wr_account_credit double, + wr_net_loss double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/web_returns.dat") +; +drop table if exists web_returns; +create table web_returns +using parquet +as (select * from web_returns_text) +; +drop table if exists web_returns_text; + +drop table if exists web_sales_text; +create table web_sales_text +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost double, + ws_list_price double, + ws_sales_price double, + ws_ext_discount_amt double, + ws_ext_sales_price double, + ws_ext_wholesale_cost double, + ws_ext_list_price double, + ws_ext_tax double, + ws_coupon_amt double, + ws_ext_ship_cost double, + ws_net_paid double, + ws_net_paid_inc_tax double, + ws_net_paid_inc_ship double, + ws_net_paid_inc_ship_tax double, + ws_net_profit double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/web_sales.dat") +; +drop table if exists web_sales; +create table web_sales +using parquet +as (select * from web_sales_text) +; +drop table if exists web_sales_text; + +drop table if exists web_site_text; +create table web_site_text +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset double, + web_tax_percentage double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/web_site.dat") +; +drop table if exists web_site; +create table web_site +using parquet +as (select * from web_site_text) +; +drop table if exists web_site_text; diff --git a/tests/tpcdsbench/src/ddl/individual/call_center.sql b/tests/tpcdsbench/src/ddl/individual/call_center.sql new file mode 100644 index 000000000..be60ee27b --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/call_center.sql @@ -0,0 +1,44 @@ +drop table if exists call_center_text; +create table call_center_text +( + cc_call_center_sk int, + cc_call_center_id string, + cc_rec_start_date string, + cc_rec_end_date string, + cc_closed_date_sk int, + cc_open_date_sk int, + cc_name string, + cc_class string, + cc_employees int, + cc_sq_ft int, + cc_hours string, + cc_manager string, + cc_mkt_id int, + cc_mkt_class string, + cc_mkt_desc string, + cc_market_manager string, + cc_division int, + cc_division_name string, + cc_company int, + cc_company_name string, + cc_street_number string, + cc_street_name string, + cc_street_type string, + cc_suite_number string, + cc_city string, + cc_county string, + cc_state string, + cc_zip string, + cc_country string, + cc_gmt_offset double, + cc_tax_percentage double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/call_center") +; +drop table if exists call_center; +create table call_center +using parquet +as (select * from call_center_text) +; +drop table if exists call_center_text; diff --git a/tests/tpcdsbench/src/ddl/individual/catalog_page.sql b/tests/tpcdsbench/src/ddl/individual/catalog_page.sql new file mode 100644 index 000000000..7fd842482 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/catalog_page.sql @@ -0,0 +1,22 @@ +drop table if exists catalog_page_text; +create table catalog_page_text +( + cp_catalog_page_sk int, + cp_catalog_page_id string, + cp_start_date_sk int, + cp_end_date_sk int, + cp_department string, + cp_catalog_number int, + cp_catalog_page_number int, + cp_description string, + cp_type string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/catalog_page") +; +drop table if exists catalog_page; +create table catalog_page +using parquet +as (select * from catalog_page_text) +; +drop table if exists catalog_page_text; diff --git a/tests/tpcdsbench/src/ddl/individual/catalog_returns.sql b/tests/tpcdsbench/src/ddl/individual/catalog_returns.sql new file mode 100644 index 000000000..c50565824 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/catalog_returns.sql @@ -0,0 +1,40 @@ +drop table if exists catalog_returns_text; +create table catalog_returns_text +( + cr_returned_date_sk int, + cr_returned_time_sk int, + cr_item_sk int, + cr_refunded_customer_sk int, + cr_refunded_cdemo_sk int, + cr_refunded_hdemo_sk int, + cr_refunded_addr_sk int, + cr_returning_customer_sk int, + cr_returning_cdemo_sk int, + cr_returning_hdemo_sk int, + cr_returning_addr_sk int, + cr_call_center_sk int, + cr_catalog_page_sk int, + cr_ship_mode_sk int, + cr_warehouse_sk int, + cr_reason_sk int, + cr_order_number int, + cr_return_quantity int, + cr_return_amount double, + cr_return_tax double, + cr_return_amt_inc_tax double, + cr_fee double, + cr_return_ship_cost double, + cr_refunded_cash double, + cr_reversed_charge double, + cr_store_credit double, + cr_net_loss double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/catalog_returns") +; +drop table if exists catalog_returns; +create table catalog_returns +using parquet +as (select * from catalog_returns_text) +; +drop table if exists catalog_returns_text; diff --git a/tests/tpcdsbench/src/ddl/individual/catalog_sales.sql b/tests/tpcdsbench/src/ddl/individual/catalog_sales.sql new file mode 100644 index 000000000..1d208df11 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/catalog_sales.sql @@ -0,0 +1,47 @@ +drop table if exists catalog_sales_text; +create table catalog_sales_text +( + cs_sold_date_sk int, + cs_sold_time_sk int, + cs_ship_date_sk int, + cs_bill_customer_sk int, + cs_bill_cdemo_sk int, + cs_bill_hdemo_sk int, + cs_bill_addr_sk int, + cs_ship_customer_sk int, + cs_ship_cdemo_sk int, + cs_ship_hdemo_sk int, + cs_ship_addr_sk int, + cs_call_center_sk int, + cs_catalog_page_sk int, + cs_ship_mode_sk int, + cs_warehouse_sk int, + cs_item_sk int, + cs_promo_sk int, + cs_order_number int, + cs_quantity int, + cs_wholesale_cost double, + cs_list_price double, + cs_sales_price double, + cs_ext_discount_amt double, + cs_ext_sales_price double, + cs_ext_wholesale_cost double, + cs_ext_list_price double, + cs_ext_tax double, + cs_coupon_amt double, + cs_ext_ship_cost double, + cs_net_paid double, + cs_net_paid_inc_tax double, + cs_net_paid_inc_ship double, + cs_net_paid_inc_ship_tax double, + cs_net_profit double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/catalog_sales") +; +drop table if exists catalog_sales; +create table catalog_sales +using parquet +as (select * from catalog_sales_text) +; +drop table if exists catalog_sales_text; diff --git a/tests/tpcdsbench/src/ddl/individual/customer.sql b/tests/tpcdsbench/src/ddl/individual/customer.sql new file mode 100644 index 000000000..6f4634c7e --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/customer.sql @@ -0,0 +1,31 @@ +drop table if exists customer_text; +create table customer_text +( + c_customer_sk int, + c_customer_id string, + c_current_cdemo_sk int, + c_current_hdemo_sk int, + c_current_addr_sk int, + c_first_shipto_date_sk int, + c_first_sales_date_sk int, + c_salutation string, + c_first_name string, + c_last_name string, + c_preferred_cust_flag string, + c_birth_day int, + c_birth_month int, + c_birth_year int, + c_birth_country string, + c_login string, + c_email_address string, + c_last_review_date string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/customer") +; +drop table if exists customer; +create table customer +using parquet +as (select * from customer_text) +; +drop table if exists customer_text; diff --git a/tests/tpcdsbench/src/ddl/individual/customer_address.sql b/tests/tpcdsbench/src/ddl/individual/customer_address.sql new file mode 100644 index 000000000..1edf4b0c0 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/customer_address.sql @@ -0,0 +1,26 @@ +drop table if exists customer_address_text; +create table customer_address_text +( + ca_address_sk int, + ca_address_id string, + ca_street_number string, + ca_street_name string, + ca_street_type string, + ca_suite_number string, + ca_city string, + ca_county string, + ca_state string, + ca_zip string, + ca_country string, + ca_gmt_offset double, + ca_location_type string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/customer_address") +; +drop table if exists customer_address; +create table customer_address +using parquet +as (select * from customer_address_text) +; +drop table if exists customer_address_text; diff --git a/tests/tpcdsbench/src/ddl/individual/customer_demographics.sql b/tests/tpcdsbench/src/ddl/individual/customer_demographics.sql new file mode 100644 index 000000000..d62700ac8 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/customer_demographics.sql @@ -0,0 +1,22 @@ +drop table if exists customer_demographics_text; +create table customer_demographics_text +( + cd_demo_sk int, + cd_gender string, + cd_marital_status string, + cd_education_status string, + cd_purchase_estimate int, + cd_credit_rating string, + cd_dep_count int, + cd_dep_employed_count int, + cd_dep_college_count int +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/customer_demographics") +; +drop table if exists customer_demographics; +create table customer_demographics +using parquet +as (select * from customer_demographics_text) +; +drop table if exists customer_demographics_text; diff --git a/tests/tpcdsbench/src/ddl/individual/date_dim.sql b/tests/tpcdsbench/src/ddl/individual/date_dim.sql new file mode 100644 index 000000000..82ec4dad0 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/date_dim.sql @@ -0,0 +1,41 @@ +drop table if exists date_dim_text; +create table date_dim_text +( + d_date_sk int, + d_date_id string, + d_date string, + d_month_seq int, + d_week_seq int, + d_quarter_seq int, + d_year int, + d_dow int, + d_moy int, + d_dom int, + d_qoy int, + d_fy_year int, + d_fy_quarter_seq int, + d_fy_week_seq int, + d_day_name string, + d_quarter_name string, + d_holiday string, + d_weekend string, + d_following_holiday string, + d_first_dom int, + d_last_dom int, + d_same_day_ly int, + d_same_day_lq int, + d_current_day string, + d_current_week string, + d_current_month string, + d_current_quarter string, + d_current_year string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/date_dim") +; +drop table if exists date_dim; +create table date_dim +using parquet +as (select * from date_dim_text) +; +drop table if exists date_dim_text; diff --git a/tests/tpcdsbench/src/ddl/individual/household_demographics.sql b/tests/tpcdsbench/src/ddl/individual/household_demographics.sql new file mode 100644 index 000000000..47c11f72c --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/household_demographics.sql @@ -0,0 +1,18 @@ +drop table if exists household_demographics_text; +create table household_demographics_text +( + hd_demo_sk int, + hd_income_band_sk int, + hd_buy_potential string, + hd_dep_count int, + hd_vehicle_count int +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/household_demographics") +; +drop table if exists household_demographics; +create table household_demographics +using parquet +as (select * from household_demographics_text) +; +drop table if exists household_demographics_text; diff --git a/tests/tpcdsbench/src/ddl/individual/income_band.sql b/tests/tpcdsbench/src/ddl/individual/income_band.sql new file mode 100644 index 000000000..695f8d488 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/income_band.sql @@ -0,0 +1,16 @@ +drop table if exists income_band_text; +create table income_band_text +( + ib_income_band_sk int, + ib_lower_bound int, + ib_upper_bound int +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/income_band") +; +drop table if exists income_band; +create table income_band +using parquet +as (select * from income_band_text) +; +drop table if exists income_band_text; diff --git a/tests/tpcdsbench/src/ddl/individual/inventory.sql b/tests/tpcdsbench/src/ddl/individual/inventory.sql new file mode 100644 index 000000000..db2447905 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/inventory.sql @@ -0,0 +1,17 @@ +drop table if exists inventory_text; +create table inventory_text +( + inv_date_sk int, + inv_item_sk int, + inv_warehouse_sk int, + inv_quantity_on_hand bigint +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/inventory") +; +drop table if exists inventory; +create table inventory +using parquet +as (select * from inventory_text) +; +drop table if exists inventory_text; diff --git a/tests/tpcdsbench/src/ddl/individual/item.sql b/tests/tpcdsbench/src/ddl/individual/item.sql new file mode 100644 index 000000000..d5fb49e08 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/item.sql @@ -0,0 +1,35 @@ +drop table if exists item_text; +create table item_text +( + i_item_sk int, + i_item_id string, + i_rec_start_date string, + i_rec_end_date string, + i_item_desc string, + i_current_price double, + i_wholesale_cost double, + i_brand_id int, + i_brand string, + i_class_id int, + i_class string, + i_category_id int, + i_category string, + i_manufact_id int, + i_manufact string, + i_size string, + i_formulation string, + i_color string, + i_units string, + i_container string, + i_manager_id int, + i_product_name string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/item") +; +drop table if exists item; +create table item +using parquet +as (select * from item_text) +; +drop table if exists item_text; diff --git a/tests/tpcdsbench/src/ddl/individual/promotion.sql b/tests/tpcdsbench/src/ddl/individual/promotion.sql new file mode 100644 index 000000000..b8ebf4ffd --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/promotion.sql @@ -0,0 +1,31 @@ +create table promotion_text +( + p_promo_sk int, + p_promo_id string, + p_start_date_sk int, + p_end_date_sk int, + p_item_sk int, + p_cost double, + p_response_target int, + p_promo_name string, + p_channel_dmail string, + p_channel_email string, + p_channel_catalog string, + p_channel_tv string, + p_channel_radio string, + p_channel_press string, + p_channel_event string, + p_channel_demo string, + p_channel_details string, + p_purpose string, + p_discount_active string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/promotion") +; +drop table if exists promotion; +create table promotion +using parquet +as (select * from promotion_text) +; +drop table if exists promotion_text; diff --git a/tests/tpcdsbench/src/ddl/individual/reason.sql b/tests/tpcdsbench/src/ddl/individual/reason.sql new file mode 100644 index 000000000..232c724b3 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/reason.sql @@ -0,0 +1,16 @@ +drop table if exists reason_text; +create table reason_text +( + r_reason_sk int, + r_reason_id string, + r_reason_desc string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/reason") +; +drop table if exists reason; +create table reason +using parquet +as (select * from reason_text) +; +drop table if exists reason_text; diff --git a/tests/tpcdsbench/src/ddl/individual/ship_mode.sql b/tests/tpcdsbench/src/ddl/individual/ship_mode.sql new file mode 100644 index 000000000..9a962d07d --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/ship_mode.sql @@ -0,0 +1,19 @@ +drop table if exists ship_mode_text; +create table ship_mode_text +( + sm_ship_mode_sk int, + sm_ship_mode_id string, + sm_type string, + sm_code string, + sm_carrier string, + sm_contract string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/ship_mode") +; +drop table if exists ship_mode; +create table ship_mode +using parquet +as (select * from ship_mode_text) +; +drop table if exists ship_mode_text; diff --git a/tests/tpcdsbench/src/ddl/individual/store.sql b/tests/tpcdsbench/src/ddl/individual/store.sql new file mode 100644 index 000000000..067e90129 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/store.sql @@ -0,0 +1,42 @@ +drop table if exists store_text; +create table store_text +( + s_store_sk int, + s_store_id string, + s_rec_start_date string, + s_rec_end_date string, + s_closed_date_sk int, + s_store_name string, + s_number_employees int, + s_floor_space int, + s_hours string, + s_manager string, + s_market_id int, + s_geography_class string, + s_market_desc string, + s_market_manager string, + s_division_id int, + s_division_name string, + s_company_id int, + s_company_name string, + s_street_number string, + s_street_name string, + s_street_type string, + s_suite_number string, + s_city string, + s_county string, + s_state string, + s_zip string, + s_country string, + s_gmt_offset double, + s_tax_precentage double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/store") +; +drop table if exists store; +create table store +using parquet +as (select * from store_text) +; +drop table if exists store_text; diff --git a/tests/tpcdsbench/src/ddl/individual/store_returns.sql b/tests/tpcdsbench/src/ddl/individual/store_returns.sql new file mode 100644 index 000000000..4ed8d9228 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/store_returns.sql @@ -0,0 +1,33 @@ +drop table if exists store_returns_text; +create table store_returns_text +( + sr_returned_date_sk int, + sr_return_time_sk int, + sr_item_sk int, + sr_customer_sk int, + sr_cdemo_sk int, + sr_hdemo_sk int, + sr_addr_sk int, + sr_store_sk int, + sr_reason_sk int, + sr_ticket_number int, + sr_return_quantity int, + sr_return_amt double, + sr_return_tax double, + sr_return_amt_inc_tax double, + sr_fee double, + sr_return_ship_cost double, + sr_refunded_cash double, + sr_reversed_charge double, + sr_store_credit double, + sr_net_loss double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/store_returns") +; +drop table if exists store_returns; +create table store_returns +using parquet +as (select * from store_returns_text) +; +drop table if exists store_returns_text; diff --git a/tests/tpcdsbench/src/ddl/individual/store_sales.sql b/tests/tpcdsbench/src/ddl/individual/store_sales.sql new file mode 100644 index 000000000..38cb56866 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/store_sales.sql @@ -0,0 +1,36 @@ +drop table if exists store_sales_text; +create table store_sales_text +( + ss_sold_date_sk int, + ss_sold_time_sk int, + ss_item_sk int, + ss_customer_sk int, + ss_cdemo_sk int, + ss_hdemo_sk int, + ss_addr_sk int, + ss_store_sk int, + ss_promo_sk int, + ss_ticket_number int, + ss_quantity int, + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/store_sales") +; +drop table if exists store_sales; +create table store_sales +using parquet +as (select * from store_sales_text) +; +drop table if exists store_sales_text; diff --git a/tests/tpcdsbench/src/ddl/individual/time_dim.sql b/tests/tpcdsbench/src/ddl/individual/time_dim.sql new file mode 100644 index 000000000..afe9ac405 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/time_dim.sql @@ -0,0 +1,23 @@ +drop table if exists time_dim_text; +create table time_dim_text +( + t_time_sk int, + t_time_id string, + t_time int, + t_hour int, + t_minute int, + t_second int, + t_am_pm string, + t_shift string, + t_sub_shift string, + t_meal_time string +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/time_dim") +; +drop table if exists time_dim; +create table time_dim +using parquet +as (select * from time_dim_text) +; +drop table if exists time_dim_text; diff --git a/tests/tpcdsbench/src/ddl/individual/warehouse.sql b/tests/tpcdsbench/src/ddl/individual/warehouse.sql new file mode 100644 index 000000000..865855c65 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/warehouse.sql @@ -0,0 +1,27 @@ +drop table if exists warehouse_text; +create table warehouse_text +( + w_warehouse_sk int, + w_warehouse_id string, + w_warehouse_name string, + w_warehouse_sq_ft int, + w_street_number string, + w_street_name string, + w_street_type string, + w_suite_number string, + w_city string, + w_county string, + w_state string, + w_zip string, + w_country string, + w_gmt_offset double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/warehouse") +; +drop table if exists warehouse; +create table warehouse +using parquet +as (select * from warehouse_text) +; +drop table if exists warehouse_text; diff --git a/tests/tpcdsbench/src/ddl/individual/web_page.sql b/tests/tpcdsbench/src/ddl/individual/web_page.sql new file mode 100644 index 000000000..1f0b8f933 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/web_page.sql @@ -0,0 +1,27 @@ +drop table if exists web_page_text; +create table web_page_text +( + wp_web_page_sk int, + wp_web_page_id string, + wp_rec_start_date string, + wp_rec_end_date string, + wp_creation_date_sk int, + wp_access_date_sk int, + wp_autogen_flag string, + wp_customer_sk int, + wp_url string, + wp_type string, + wp_char_count int, + wp_link_count int, + wp_image_count int, + wp_max_ad_count int +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/web_page") +; +drop table if exists web_page; +create table web_page +using parquet +as (select * from web_page_text) +; +drop table if exists web_page_text; diff --git a/tests/tpcdsbench/src/ddl/individual/web_returns.sql b/tests/tpcdsbench/src/ddl/individual/web_returns.sql new file mode 100644 index 000000000..280a1e5fc --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/web_returns.sql @@ -0,0 +1,37 @@ +drop table if exists web_returns_text; +create table web_returns_text +( + wr_returned_date_sk int, + wr_returned_time_sk int, + wr_item_sk int, + wr_refunded_customer_sk int, + wr_refunded_cdemo_sk int, + wr_refunded_hdemo_sk int, + wr_refunded_addr_sk int, + wr_returning_customer_sk int, + wr_returning_cdemo_sk int, + wr_returning_hdemo_sk int, + wr_returning_addr_sk int, + wr_web_page_sk int, + wr_reason_sk int, + wr_order_number int, + wr_return_quantity int, + wr_return_amt double, + wr_return_tax double, + wr_return_amt_inc_tax double, + wr_fee double, + wr_return_ship_cost double, + wr_refunded_cash double, + wr_reversed_charge double, + wr_account_credit double, + wr_net_loss double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/web_returns") +; +drop table if exists web_returns; +create table web_returns +using parquet +as (select * from web_returns_text) +; +drop table if exists web_returns_text; diff --git a/tests/tpcdsbench/src/ddl/individual/web_sales.sql b/tests/tpcdsbench/src/ddl/individual/web_sales.sql new file mode 100644 index 000000000..f00744a11 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/web_sales.sql @@ -0,0 +1,47 @@ +drop table if exists web_sales_text; +create table web_sales_text +( + ws_sold_date_sk int, + ws_sold_time_sk int, + ws_ship_date_sk int, + ws_item_sk int, + ws_bill_customer_sk int, + ws_bill_cdemo_sk int, + ws_bill_hdemo_sk int, + ws_bill_addr_sk int, + ws_ship_customer_sk int, + ws_ship_cdemo_sk int, + ws_ship_hdemo_sk int, + ws_ship_addr_sk int, + ws_web_page_sk int, + ws_web_site_sk int, + ws_ship_mode_sk int, + ws_warehouse_sk int, + ws_promo_sk int, + ws_order_number int, + ws_quantity int, + ws_wholesale_cost double, + ws_list_price double, + ws_sales_price double, + ws_ext_discount_amt double, + ws_ext_sales_price double, + ws_ext_wholesale_cost double, + ws_ext_list_price double, + ws_ext_tax double, + ws_coupon_amt double, + ws_ext_ship_cost double, + ws_net_paid double, + ws_net_paid_inc_tax double, + ws_net_paid_inc_ship double, + ws_net_paid_inc_ship_tax double, + ws_net_profit double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/web_sales") +; +drop table if exists web_sales; +create table web_sales +using parquet +as (select * from web_sales_text) +; +drop table if exists web_sales_text; diff --git a/tests/tpcdsbench/src/ddl/individual/web_site.sql b/tests/tpcdsbench/src/ddl/individual/web_site.sql new file mode 100644 index 000000000..2b841f816 --- /dev/null +++ b/tests/tpcdsbench/src/ddl/individual/web_site.sql @@ -0,0 +1,39 @@ +drop table if exists web_site_text; +create table web_site_text +( + web_site_sk int, + web_site_id string, + web_rec_start_date string, + web_rec_end_date string, + web_name string, + web_open_date_sk int, + web_close_date_sk int, + web_class string, + web_manager string, + web_mkt_id int, + web_mkt_class string, + web_mkt_desc string, + web_market_manager string, + web_company_id int, + web_company_name string, + web_street_number string, + web_street_name string, + web_street_type string, + web_suite_number string, + web_city string, + web_county string, + web_state string, + web_zip string, + web_country string, + web_gmt_offset double, + web_tax_percentage double +) +USING csv +OPTIONS(header "false", delimiter "|", path "${TPCDS_GENDATA_DIR}/web_site") +; +drop table if exists web_site; +create table web_site +using parquet +as (select * from web_site_text) +; +drop table if exists web_site_text; diff --git a/tests/tpcdsbench/src/ddl/row_counts.sql b/tests/tpcdsbench/src/ddl/row_counts.sql new file mode 100644 index 000000000..a09bde26d --- /dev/null +++ b/tests/tpcdsbench/src/ddl/row_counts.sql @@ -0,0 +1,36 @@ +------------------------------------------------------------------------------ +-- Licensed Materials - Property of IBM +-- +-- (C) COPYRIGHT International Business Machines Corp. 2017 +-- All Rights Reserved. +-- +-- US Government Users Restricted Rights - Use, duplication or +-- disclosure restricted by GSA ADP Schedule Contract with IBM Corp. +------------------------------------------------------------------------------ + +USE ${TPCDS_DBNAME}; + +select count(*) from call_center; +select count(*) from catalog_page; +select count(*) from catalog_returns; +select count(*) from catalog_sales; +select count(*) from customer; +select count(*) from customer_address; +select count(*) from customer_demographics; +select count(*) from date_dim; +select count(*) from household_demographics; +select count(*) from income_band; +select count(*) from inventory; +select count(*) from item; +select count(*) from promotion; +select count(*) from reason; +select count(*) from ship_mode; +select count(*) from store; +select count(*) from store_returns; +select count(*) from store_sales; +select count(*) from time_dim; +select count(*) from warehouse; +select count(*) from web_page; +select count(*) from web_returns; +select count(*) from web_sales; +select count(*) from web_site; diff --git a/tests/tpcdsbench/src/ddl/rowcounts.expected b/tests/tpcdsbench/src/ddl/rowcounts.expected new file mode 100644 index 000000000..7f999a88d --- /dev/null +++ b/tests/tpcdsbench/src/ddl/rowcounts.expected @@ -0,0 +1,24 @@ +6 +11718 +144067 +1441548 +100000 +50000 +1920800 +73049 +7200 +20 +11745000 +18000 +300 +35 +20 +12 +287514 +2880404 +86400 +5 +60 +71763 +719384 +30 diff --git a/tests/tpcdsbench/src/properties/log4j.properties b/tests/tpcdsbench/src/properties/log4j.properties new file mode 100644 index 000000000..fb94db530 --- /dev/null +++ b/tests/tpcdsbench/src/properties/log4j.properties @@ -0,0 +1,47 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the console +###Custom log file +log4j.rootCategory=INFO, file +log4j.appender.file=org.apache.log4j.RollingFileAppender +log4j.appender.file.File=${TPCDS_LOG_DIR}/spark-tpcds.log +log4j.appender.file.ImmediateFlush=true +## Set the append to false, overwrite +log4j.appender.file.Append=false +log4j.appender.file.MaxFileSize=100MB +log4j.appender.file.MaxBackupIndex=10 +##Define the layout for file appender +log4j.appender.file.layout=org.apache.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1}:%L - %m%n + +# Set the default spark-shell log level to WARN. When running the spark-shell, the +# log level for this class is used to overwrite the root logger's log level, so that +# the user can have different defaults for the shell and regular Spark apps. +log4j.logger.org.apache.spark.repl.Main=WARN + +# Settings to quiet third party logs that are too verbose +log4j.logger.org.spark_project.jetty=WARN +log4j.logger.org.spark_project.jetty.util.component.AbstractLifeCycle=ERROR +log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO +log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO +log4j.logger.org.apache.parquet=ERROR +log4j.logger.parquet=ERROR + +# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support +log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL +log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR diff --git a/tests/tpcdsbench/src/queries/query01.sql b/tests/tpcdsbench/src/queries/query01.sql new file mode 100644 index 000000000..383e14c90 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query01.sql @@ -0,0 +1,24 @@ +-- start query 1 in stream 0 using template query1.tpl and seed QUALIFICATION +with customer_total_return as +(select sr_customer_sk as ctr_customer_sk +,sr_store_sk as ctr_store_sk +,sum(SR_RETURN_AMT) as ctr_total_return +from store_returns +,date_dim +where sr_returned_date_sk = d_date_sk +and d_year =2000 +group by sr_customer_sk +,sr_store_sk) + select c_customer_id +from customer_total_return ctr1 +,store +,customer +where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 +from customer_total_return ctr2 +where ctr1.ctr_store_sk = ctr2.ctr_store_sk) +and s_store_sk = ctr1.ctr_store_sk +and s_state = 'TN' +and ctr1.ctr_customer_sk = c_customer_sk +order by c_customer_id + limit 100; +-- end query 1 in stream 0 using template query1.tpl diff --git a/tests/tpcdsbench/src/queries/query02.sql b/tests/tpcdsbench/src/queries/query02.sql new file mode 100644 index 000000000..dafc62f49 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query02.sql @@ -0,0 +1,59 @@ +-- start query 2 in stream 0 using template query2.tpl and seed QUALIFICATION + with wscs as + (select sold_date_sk + ,sales_price + from (select ws_sold_date_sk sold_date_sk + ,ws_ext_sales_price sales_price + from web_sales + union all + select cs_sold_date_sk sold_date_sk + ,cs_ext_sales_price sales_price + from catalog_sales) x ), + wswscs as + (select d_week_seq, + sum(case when (d_day_name='Sunday') then sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then sales_price else null end) sat_sales + from wscs + ,date_dim + where d_date_sk = sold_date_sk + group by d_week_seq) + select d_week_seq1 + ,round(sun_sales1/sun_sales2,2) + ,round(mon_sales1/mon_sales2,2) + ,round(tue_sales1/tue_sales2,2) + ,round(wed_sales1/wed_sales2,2) + ,round(thu_sales1/thu_sales2,2) + ,round(fri_sales1/fri_sales2,2) + ,round(sat_sales1/sat_sales2,2) + from + (select wswscs.d_week_seq d_week_seq1 + ,sun_sales sun_sales1 + ,mon_sales mon_sales1 + ,tue_sales tue_sales1 + ,wed_sales wed_sales1 + ,thu_sales thu_sales1 + ,fri_sales fri_sales1 + ,sat_sales sat_sales1 + from wswscs,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001) y, + (select wswscs.d_week_seq d_week_seq2 + ,sun_sales sun_sales2 + ,mon_sales mon_sales2 + ,tue_sales tue_sales2 + ,wed_sales wed_sales2 + ,thu_sales thu_sales2 + ,fri_sales fri_sales2 + ,sat_sales sat_sales2 + from wswscs + ,date_dim + where date_dim.d_week_seq = wswscs.d_week_seq and + d_year = 2001+1) z + where d_week_seq1=d_week_seq2-53 + order by d_week_seq1; +-- end query 2 in stream 0 using template query2.tpl diff --git a/tests/tpcdsbench/src/queries/query03.sql b/tests/tpcdsbench/src/queries/query03.sql new file mode 100644 index 000000000..ada1d1b15 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query03.sql @@ -0,0 +1,20 @@ +-- start query 3 in stream 0 using template query3.tpl and seed QUALIFICATION + select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) sum_agg + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manufact_id = 128 + and dt.d_moy=11 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,sum_agg desc + ,brand_id + limit 100; +-- end query 3 in stream 0 using template query3.tpl diff --git a/tests/tpcdsbench/src/queries/query04.sql b/tests/tpcdsbench/src/queries/query04.sql new file mode 100644 index 000000000..76b528c9c --- /dev/null +++ b/tests/tpcdsbench/src/queries/query04.sql @@ -0,0 +1,115 @@ +-- start query 4 in stream 0 using template query4.tpl and seed QUALIFICATION +with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(((ss_ext_list_price-ss_ext_wholesale_cost-ss_ext_discount_amt)+ss_ext_sales_price)/2) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((cs_ext_list_price-cs_ext_wholesale_cost-cs_ext_discount_amt)+cs_ext_sales_price)/2) ) year_total + ,'c' sale_type + from customer + ,catalog_sales + ,date_dim + where c_customer_sk = cs_bill_customer_sk + and cs_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year +union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum((((ws_ext_list_price-ws_ext_wholesale_cost-ws_ext_discount_amt)+ws_ext_sales_price)/2) ) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_c_firstyear + ,year_total t_c_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_c_secyear.customer_id + and t_s_firstyear.customer_id = t_c_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_c_firstyear.sale_type = 'c' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_c_secyear.sale_type = 'c' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_c_firstyear.dyear = 2001 + and t_c_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_c_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + and case when t_c_firstyear.year_total > 0 then t_c_secyear.year_total / t_c_firstyear.year_total else null end + > case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag + limit 100; +-- end query 4 in stream 0 using template query4.tpl diff --git a/tests/tpcdsbench/src/queries/query05.sql b/tests/tpcdsbench/src/queries/query05.sql new file mode 100644 index 000000000..89729a3e6 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query05.sql @@ -0,0 +1,127 @@ +-- start query 5 in stream 0 using template query5.tpl and seed QUALIFICATION + with ssr as + (select s_store_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ss_store_sk as store_sk, + ss_sold_date_sk as date_sk, + ss_ext_sales_price as sales_price, + ss_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from store_sales + union all + select sr_store_sk as store_sk, + sr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + sr_return_amt as return_amt, + sr_net_loss as net_loss + from store_returns + ) salesreturns, + date_dim, + store + where date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 14 ) + and store_sk = s_store_sk + group by s_store_id) + , + csr as + (select cp_catalog_page_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select cs_catalog_page_sk as page_sk, + cs_sold_date_sk as date_sk, + cs_ext_sales_price as sales_price, + cs_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from catalog_sales + union all + select cr_catalog_page_sk as page_sk, + cr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + cr_return_amount as return_amt, + cr_net_loss as net_loss + from catalog_returns + ) salesreturns, + date_dim, + catalog_page + where date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 14 ) + and page_sk = cp_catalog_page_sk + group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(sales_price) as sales, + sum(profit) as profit, + sum(return_amt) as returns, + sum(net_loss) as profit_loss + from + ( select ws_web_site_sk as wsr_web_site_sk, + ws_sold_date_sk as date_sk, + ws_ext_sales_price as sales_price, + ws_net_profit as profit, + cast(0 as decimal(7,2)) as return_amt, + cast(0 as decimal(7,2)) as net_loss + from web_sales + union all + select ws_web_site_sk as wsr_web_site_sk, + wr_returned_date_sk as date_sk, + cast(0 as decimal(7,2)) as sales_price, + cast(0 as decimal(7,2)) as profit, + wr_return_amt as return_amt, + wr_net_loss as net_loss + from web_returns left outer join web_sales on + ( wr_item_sk = ws_item_sk + and wr_order_number = ws_order_number) + ) salesreturns, + date_dim, + web_site + where date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 14 ) + and wsr_web_site_sk = web_site_sk + group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , concat('store', s_store_id) as id + , sales + , returns + , (profit - profit_loss) as profit + from ssr + union all + select 'catalog channel' as channel + , concat('catalog_page', cp_catalog_page_id) as id + , sales + , returns + , (profit - profit_loss) as profit + from csr + union all + select 'web channel' as channel + , concat('web_site', web_site_id) as id + , sales + , returns + , (profit - profit_loss) as profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100; +-- end query 5 in stream 0 using template query5.tpl diff --git a/tests/tpcdsbench/src/queries/query06.sql b/tests/tpcdsbench/src/queries/query06.sql new file mode 100644 index 000000000..6cf0d5984 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query06.sql @@ -0,0 +1,25 @@ +-- start query 6 in stream 0 using template query6.tpl and seed QUALIFICATION + select a.ca_state state, count(*) cnt + from customer_address a + ,customer c + ,store_sales s + ,date_dim d + ,item i + where a.ca_address_sk = c.c_current_addr_sk + and c.c_customer_sk = s.ss_customer_sk + and s.ss_sold_date_sk = d.d_date_sk + and s.ss_item_sk = i.i_item_sk + and d.d_month_seq = + (select distinct (d_month_seq) + from date_dim + where d_year = 2001 + and d_moy = 1 ) + and i.i_current_price > 1.2 * + (select avg(j.i_current_price) + from item j + where j.i_category = i.i_category) + group by a.ca_state + having count(*) >= 10 + order by cnt + limit 100; +-- end query 6 in stream 0 using template query6.tpl diff --git a/tests/tpcdsbench/src/queries/query07.sql b/tests/tpcdsbench/src/queries/query07.sql new file mode 100644 index 000000000..af4b7d9f7 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query07.sql @@ -0,0 +1,20 @@ +-- start query 7 in stream 0 using template query7.tpl and seed QUALIFICATION + select i_item_id, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, item, promotion + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_cdemo_sk = cd_demo_sk and + ss_promo_sk = p_promo_sk and + cd_gender = 'M' and + cd_marital_status = 'S' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2000 + group by i_item_id + order by i_item_id + limit 100; +-- end query 7 in stream 0 using template query7.tpl diff --git a/tests/tpcdsbench/src/queries/query08.sql b/tests/tpcdsbench/src/queries/query08.sql new file mode 100644 index 000000000..bd3f4ed06 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query08.sql @@ -0,0 +1,107 @@ +-- start query 8 in stream 0 using template query8.tpl and seed QUALIFICATION + select s_store_name + ,sum(ss_net_profit) + from store_sales + ,date_dim + ,store, + (select ca_zip + from ( + SELECT substr(ca_zip,1,5) ca_zip + FROM customer_address + WHERE substr(ca_zip,1,5) IN ( + '24128','76232','65084','87816','83926','77556', + '20548','26231','43848','15126','91137', + '61265','98294','25782','17920','18426', + '98235','40081','84093','28577','55565', + '17183','54601','67897','22752','86284', + '18376','38607','45200','21756','29741', + '96765','23932','89360','29839','25989', + '28898','91068','72550','10390','18845', + '47770','82636','41367','76638','86198', + '81312','37126','39192','88424','72175', + '81426','53672','10445','42666','66864', + '66708','41248','48583','82276','18842', + '78890','49448','14089','38122','34425', + '79077','19849','43285','39861','66162', + '77610','13695','99543','83444','83041', + '12305','57665','68341','25003','57834', + '62878','49130','81096','18840','27700', + '23470','50412','21195','16021','76107', + '71954','68309','18119','98359','64544', + '10336','86379','27068','39736','98569', + '28915','24206','56529','57647','54917', + '42961','91110','63981','14922','36420', + '23006','67467','32754','30903','20260', + '31671','51798','72325','85816','68621', + '13955','36446','41766','68806','16725', + '15146','22744','35850','88086','51649', + '18270','52867','39972','96976','63792', + '11376','94898','13595','10516','90225', + '58943','39371','94945','28587','96576', + '57855','28488','26105','83933','25858', + '34322','44438','73171','30122','34102', + '22685','71256','78451','54364','13354', + '45375','40558','56458','28286','45266', + '47305','69399','83921','26233','11101', + '15371','69913','35942','15882','25631', + '24610','44165','99076','33786','70738', + '26653','14328','72305','62496','22152', + '10144','64147','48425','14663','21076', + '18799','30450','63089','81019','68893', + '24996','51200','51211','45692','92712', + '70466','79994','22437','25280','38935', + '71791','73134','56571','14060','19505', + '72425','56575','74351','68786','51650', + '20004','18383','76614','11634','18906', + '15765','41368','73241','76698','78567', + '97189','28545','76231','75691','22246', + '51061','90578','56691','68014','51103', + '94167','57047','14867','73520','15734', + '63435','25733','35474','24676','94627', + '53535','17879','15559','53268','59166', + '11928','59402','33282','45721','43933', + '68101','33515','36634','71286','19736', + '58058','55253','67473','41918','19515', + '36495','19430','22351','77191','91393', + '49156','50298','87501','18652','53179', + '18767','63193','23968','65164','68880', + '21286','72823','58470','67301','13394', + '31016','70372','67030','40604','24317', + '45748','39127','26065','77721','31029', + '31880','60576','24671','45549','13376', + '50016','33123','19769','22927','97789', + '46081','72151','15723','46136','51949', + '68100','96888','64528','14171','79777', + '28709','11489','25103','32213','78668', + '22245','15798','27156','37930','62971', + '21337','51622','67853','10567','38415', + '15455','58263','42029','60279','37125', + '56240','88190','50308','26859','64457', + '89091','82136','62377','36233','63837', + '58078','17043','30010','60099','28810', + '98025','29178','87343','73273','30469', + '64034','39516','86057','21309','90257', + '67875','40162','11356','73650','61810', + '72013','30431','22461','19512','13375', + '55307','30625','83849','68908','26689', + '96451','38193','46820','88885','84935', + '69035','83144','47537','56616','94983', + '48033','69952','25486','61547','27385', + '61860','58048','56910','16807','17871', + '35258','31387','35458','35576') + intersect + select ca_zip + from (SELECT substr(ca_zip,1,5) ca_zip,count(*) cnt + FROM customer_address, customer + WHERE ca_address_sk = c_current_addr_sk and + c_preferred_cust_flag='Y' + group by ca_zip + having count(*) > 10)A1)A2) V1 + where ss_store_sk = s_store_sk + and ss_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 1998 + and (substr(s_zip,1,2) = substr(V1.ca_zip,1,2)) + group by s_store_name + order by s_store_name + limit 100; +-- end query 8 in stream 0 using template query8.tpl diff --git a/tests/tpcdsbench/src/queries/query09.sql b/tests/tpcdsbench/src/queries/query09.sql new file mode 100644 index 000000000..81de9e49b --- /dev/null +++ b/tests/tpcdsbench/src/queries/query09.sql @@ -0,0 +1,50 @@ +-- start query 9 in stream 0 using template query9.tpl and seed QUALIFICATION +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 74129 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_paid) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 122840 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_paid) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 56580 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_paid) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10097 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_paid) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 165306 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_paid) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +; +-- end query 9 in stream 0 using template query9.tpl diff --git a/tests/tpcdsbench/src/queries/query10.sql b/tests/tpcdsbench/src/queries/query10.sql new file mode 100644 index 000000000..d07374659 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query10.sql @@ -0,0 +1,58 @@ +-- start query 10 in stream 0 using template query10.tpl and seed QUALIFICATION + select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_county in ('Rush County','Toole County','Jefferson County','Dona Ana County','La Porte County') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 and 1+3) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 ANd 1+3) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_moy between 1 and 1+3)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100; +-- end query 10 in stream 0 using template query10.tpl diff --git a/tests/tpcdsbench/src/queries/query11.sql b/tests/tpcdsbench/src/queries/query11.sql new file mode 100644 index 000000000..3d0f458d4 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query11.sql @@ -0,0 +1,80 @@ +-- start query 11 in stream 0 using template query11.tpl and seed QUALIFICATION + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ss_ext_list_price-ss_ext_discount_amt) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,c_preferred_cust_flag customer_preferred_cust_flag + ,c_birth_country customer_birth_country + ,c_login customer_login + ,c_email_address customer_email_address + ,d_year dyear + ,sum(ws_ext_list_price-ws_ext_discount_amt) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + group by c_customer_id + ,c_first_name + ,c_last_name + ,c_preferred_cust_flag + ,c_birth_country + ,c_login + ,c_email_address + ,d_year + ) + select + t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.dyear = 2001 + and t_s_secyear.dyear = 2001+1 + and t_w_firstyear.dyear = 2001 + and t_w_secyear.dyear = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else 0.0 end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else 0.0 end + order by t_s_secyear.customer_id + ,t_s_secyear.customer_first_name + ,t_s_secyear.customer_last_name + ,t_s_secyear.customer_preferred_cust_flag + limit 100; +-- end query 11 in stream 0 using template query11.tpl diff --git a/tests/tpcdsbench/src/queries/query12.sql b/tests/tpcdsbench/src/queries/query12.sql new file mode 100644 index 000000000..c88d4d11b --- /dev/null +++ b/tests/tpcdsbench/src/queries/query12.sql @@ -0,0 +1,33 @@ +-- start query 12 in stream 0 using template query12.tpl and seed QUALIFICATION + select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ws_ext_sales_price) as itemrevenue + ,sum(ws_ext_sales_price)*100/sum(sum(ws_ext_sales_price)) over + (partition by i_class) as revenueratio +from + web_sales + ,item + ,date_dim +where + ws_item_sk = i_item_sk + and i_category in ('Sports', 'Books', 'Home') + and ws_sold_date_sk = d_date_sk + and d_date between cast('1999-02-22' as date) + and date_add(cast('1999-02-22' as date), 30 ) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio + limit 100; +-- end query 12 in stream 0 using template query12.tpl diff --git a/tests/tpcdsbench/src/queries/query13.sql b/tests/tpcdsbench/src/queries/query13.sql new file mode 100644 index 000000000..cb183b574 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query13.sql @@ -0,0 +1,51 @@ +-- start query 13 in stream 0 using template query13.tpl and seed QUALIFICATION + select avg(ss_quantity) + ,avg(ss_ext_sales_price) + ,avg(ss_ext_wholesale_cost) + ,sum(ss_ext_wholesale_cost) + from store_sales + ,store + ,customer_demographics + ,household_demographics + ,customer_address + ,date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2001 + and((ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'M' + and cd_education_status = 'Advanced Degree' + and ss_sales_price between 100.00 and 150.00 + and hd_dep_count = 3 + )or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'S' + and cd_education_status = 'College' + and ss_sales_price between 50.00 and 100.00 + and hd_dep_count = 1 + ) or + (ss_hdemo_sk=hd_demo_sk + and cd_demo_sk = ss_cdemo_sk + and cd_marital_status = 'W' + and cd_education_status = '2 yr Degree' + and ss_sales_price between 150.00 and 200.00 + and hd_dep_count = 1 + )) + and((ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('TX', 'OH', 'TX') + and ss_net_profit between 100 and 200 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('OR', 'NM', 'KY') + and ss_net_profit between 150 and 300 + ) or + (ss_addr_sk = ca_address_sk + and ca_country = 'United States' + and ca_state in ('VA', 'TX', 'MS') + and ss_net_profit between 50 and 250 + )) +; +-- end query 13 in stream 0 using template query13.tpl diff --git a/tests/tpcdsbench/src/queries/query14.sql b/tests/tpcdsbench/src/queries/query14.sql new file mode 100644 index 000000000..5b1de5b85 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query14.sql @@ -0,0 +1,197 @@ +-- start query 14 in stream 0 using template query14.tpl and seed QUALIFICATION +with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as + (select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select channel, i_brand_id,i_class_id,i_category_id,sum(sales), sum(number_sales) + from( + select 'store' channel, i_brand_id,i_class_id + ,i_category_id,sum(ss_quantity*ss_list_price) sales + , count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales) + union all + select 'catalog' channel, i_brand_id,i_class_id,i_category_id, sum(cs_quantity*cs_list_price) sales, count(*) number_sales + from catalog_sales + ,item + ,date_dim + where cs_item_sk in (select ss_item_sk from cross_items) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(cs_quantity*cs_list_price) > (select average_sales from avg_sales) + union all + select 'web' channel, i_brand_id,i_class_id,i_category_id, sum(ws_quantity*ws_list_price) sales , count(*) number_sales + from web_sales + ,item + ,date_dim + where ws_item_sk in (select ss_item_sk from cross_items) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1999+2 + and d_moy = 11 + group by i_brand_id,i_class_id,i_category_id + having sum(ws_quantity*ws_list_price) > (select average_sales from avg_sales) + ) y + group by rollup (channel, i_brand_id,i_class_id,i_category_id) + order by channel,i_brand_id,i_class_id,i_category_id + limit 100; + with cross_items as + (select i_item_sk ss_item_sk + from item, + (select iss.i_brand_id brand_id + ,iss.i_class_id class_id + ,iss.i_category_id category_id + from store_sales + ,item iss + ,date_dim d1 + where ss_item_sk = iss.i_item_sk + and ss_sold_date_sk = d1.d_date_sk + and d1.d_year between 1999 AND 1999 + 2 + intersect + select ics.i_brand_id + ,ics.i_class_id + ,ics.i_category_id + from catalog_sales + ,item ics + ,date_dim d2 + where cs_item_sk = ics.i_item_sk + and cs_sold_date_sk = d2.d_date_sk + and d2.d_year between 1999 AND 1999 + 2 + intersect + select iws.i_brand_id + ,iws.i_class_id + ,iws.i_category_id + from web_sales + ,item iws + ,date_dim d3 + where ws_item_sk = iws.i_item_sk + and ws_sold_date_sk = d3.d_date_sk + and d3.d_year between 1999 AND 1999 + 2) x + where i_brand_id = brand_id + and i_class_id = class_id + and i_category_id = category_id +), + avg_sales as +(select avg(quantity*list_price) average_sales + from (select ss_quantity quantity + ,ss_list_price list_price + from store_sales + ,date_dim + where ss_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select cs_quantity quantity + ,cs_list_price list_price + from catalog_sales + ,date_dim + where cs_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2 + union all + select ws_quantity quantity + ,ws_list_price list_price + from web_sales + ,date_dim + where ws_sold_date_sk = d_date_sk + and d_year between 1999 and 1999 + 2) x) + select * from + (select 'store' channel, i_brand_id,i_class_id,i_category_id + ,sum(ss_quantity*ss_list_price) sales, count(*) number_sales + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1999 + 1 + and d_moy = 12 + and d_dom = 11) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) this_year, + (select 'store' channel_2, i_brand_id as i_brand_id_2, i_class_id as i_class_id_2 + ,i_category_id as i_category_id_2 , sum(ss_quantity*ss_list_price) sales_2, count(*) number_sales_2 + from store_sales + ,item + ,date_dim + where ss_item_sk in (select ss_item_sk from cross_items) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_week_seq = (select d_week_seq + from date_dim + where d_year = 1999 + and d_moy = 12 + and d_dom = 11) + group by i_brand_id,i_class_id,i_category_id + having sum(ss_quantity*ss_list_price) > (select average_sales from avg_sales)) last_year + where this_year.i_brand_id= last_year.i_brand_id_2 + and this_year.i_class_id = last_year.i_class_id_2 + and this_year.i_category_id = last_year.i_category_id_2 + order by this_year.channel, this_year.i_brand_id, this_year.i_class_id, this_year.i_category_id + limit 100; +-- end query 14 in stream 0 using template query14.tpl diff --git a/tests/tpcdsbench/src/queries/query15.sql b/tests/tpcdsbench/src/queries/query15.sql new file mode 100644 index 000000000..3bd2cc23e --- /dev/null +++ b/tests/tpcdsbench/src/queries/query15.sql @@ -0,0 +1,19 @@ +-- start query 15 in stream 0 using template query15.tpl and seed QUALIFICATION + select ca_zip + ,sum(cs_sales_price) + from catalog_sales + ,customer + ,customer_address + ,date_dim + where cs_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', + '85392', '85460', '80348', '81792') + or ca_state in ('CA','WA','GA') + or cs_sales_price > 500) + and cs_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2001 + group by ca_zip + order by ca_zip + limit 100; +-- end query 15 in stream 0 using template query15.tpl diff --git a/tests/tpcdsbench/src/queries/query16.sql b/tests/tpcdsbench/src/queries/query16.sql new file mode 100644 index 000000000..2afd21025 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query16.sql @@ -0,0 +1,30 @@ +-- start query 16 in stream 0 using template query16.tpl and seed QUALIFICATION + select + count(distinct cs_order_number) as order_count + ,sum(cs_ext_ship_cost) as total_shipping_cost + ,sum(cs_net_profit) as total_net_profit +from + catalog_sales cs1 + ,date_dim + ,customer_address + ,call_center +where + d_date between cast('2002-2-01' as date) and + date_add(cast('2002-2-01' as date), 60 ) +and cs1.cs_ship_date_sk = d_date_sk +and cs1.cs_ship_addr_sk = ca_address_sk +and ca_state = 'GA' +and cs1.cs_call_center_sk = cc_call_center_sk +and cc_county in ('Williamson County','Williamson County','Williamson County','Williamson County', + 'Williamson County' +) +and exists (select * + from catalog_sales cs2 + where cs1.cs_order_number = cs2.cs_order_number + and cs1.cs_warehouse_sk <> cs2.cs_warehouse_sk) +and not exists(select * + from catalog_returns cr1 + where cs1.cs_order_number = cr1.cr_order_number) +order by count(distinct cs_order_number) + limit 100; +-- end query 16 in stream 0 using template query16.tpl diff --git a/tests/tpcdsbench/src/queries/query17.sql b/tests/tpcdsbench/src/queries/query17.sql new file mode 100644 index 000000000..d5e59e39c --- /dev/null +++ b/tests/tpcdsbench/src/queries/query17.sql @@ -0,0 +1,44 @@ +-- start query 17 in stream 0 using template query17.tpl and seed QUALIFICATION + select i_item_id + ,i_item_desc + ,s_state + ,count(ss_quantity) as store_sales_quantitycount + ,avg(ss_quantity) as store_sales_quantityave + ,stddev_samp(ss_quantity) as store_sales_quantitystdev + ,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov + ,count(sr_return_quantity) as store_returns_quantitycount + ,avg(sr_return_quantity) as store_returns_quantityave + ,stddev_samp(sr_return_quantity) as store_returns_quantitystdev + ,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as store_returns_quantitycov + ,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) as catalog_sales_quantityave + ,stddev_samp(cs_quantity) as catalog_sales_quantitystdev + ,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov + from store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where d1.d_quarter_name = '2001Q1' + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_quarter_name in ('2001Q1','2001Q2','2001Q3') + group by i_item_id + ,i_item_desc + ,s_state + order by i_item_id + ,i_item_desc + ,s_state + limit 100; +-- end query 17 in stream 0 using template query17.tpl diff --git a/tests/tpcdsbench/src/queries/query18.sql b/tests/tpcdsbench/src/queries/query18.sql new file mode 100644 index 000000000..54b477803 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query18.sql @@ -0,0 +1,33 @@ +-- start query 18 in stream 0 using template query18.tpl and seed QUALIFICATION + select i_item_id, + ca_country, + ca_state, + ca_county, + avg( cast(cs_quantity as decimal(12,2))) agg1, + avg( cast(cs_list_price as decimal(12,2))) agg2, + avg( cast(cs_coupon_amt as decimal(12,2))) agg3, + avg( cast(cs_sales_price as decimal(12,2))) agg4, + avg( cast(cs_net_profit as decimal(12,2))) agg5, + avg( cast(c_birth_year as decimal(12,2))) agg6, + avg( cast(cd1.cd_dep_count as decimal(12,2))) agg7 + from catalog_sales, customer_demographics cd1, + customer_demographics cd2, customer, customer_address, date_dim, item + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd1.cd_demo_sk and + cs_bill_customer_sk = c_customer_sk and + cd1.cd_gender = 'F' and + cd1.cd_education_status = 'Unknown' and + c_current_cdemo_sk = cd2.cd_demo_sk and + c_current_addr_sk = ca_address_sk and + c_birth_month in (1,6,8,9,12,2) and + d_year = 1998 and + ca_state in ('MS','IN','ND' + ,'OK','NM','VA','MS') + group by rollup (i_item_id, ca_country, ca_state, ca_county) + order by ca_country, + ca_state, + ca_county, + i_item_id + limit 100; +-- end query 18 in stream 0 using template query18.tpl diff --git a/tests/tpcdsbench/src/queries/query19.sql b/tests/tpcdsbench/src/queries/query19.sql new file mode 100644 index 000000000..6cd93d46a --- /dev/null +++ b/tests/tpcdsbench/src/queries/query19.sql @@ -0,0 +1,24 @@ +-- start query 19 in stream 0 using template query19.tpl and seed QUALIFICATION + select i_brand_id brand_id, i_brand brand, i_manufact_id, i_manufact, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item,customer,customer_address,store + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=8 + and d_moy=11 + and d_year=1998 + and ss_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and substr(ca_zip,1,5) <> substr(s_zip,1,5) + and ss_store_sk = s_store_sk + group by i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + order by ext_price desc + ,i_brand + ,i_brand_id + ,i_manufact_id + ,i_manufact + limit 100 ; +-- end query 19 in stream 0 using template query19.tpl diff --git a/tests/tpcdsbench/src/queries/query20.sql b/tests/tpcdsbench/src/queries/query20.sql new file mode 100644 index 000000000..f197f2e85 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query20.sql @@ -0,0 +1,29 @@ +-- start query 20 in stream 0 using template query20.tpl and seed QUALIFICATION + select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(cs_ext_sales_price) as itemrevenue + ,sum(cs_ext_sales_price)*100/sum(sum(cs_ext_sales_price)) over + (partition by i_class) as revenueratio + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and i_category in ('Sports', 'Books', 'Home') + and cs_sold_date_sk = d_date_sk + and d_date between cast('1999-02-22' as date) + and date_add(cast('1999-02-22' as date), 30) + group by i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + order by i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio + limit 100; +-- end query 20 in stream 0 using template query20.tpl diff --git a/tests/tpcdsbench/src/queries/query21.sql b/tests/tpcdsbench/src/queries/query21.sql new file mode 100644 index 000000000..67bdca72b --- /dev/null +++ b/tests/tpcdsbench/src/queries/query21.sql @@ -0,0 +1,29 @@ +-- start query 21 in stream 0 using template query21.tpl and seed QUALIFICATION + select * + from(select w_warehouse_name + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-03-11' as date)) + then inv_quantity_on_hand + else 0 end) as inv_before + ,sum(case when (cast(d_date as date) >= cast ('2000-03-11' as date)) + then inv_quantity_on_hand + else 0 end) as inv_after + from inventory + ,warehouse + ,item + ,date_dim + where i_current_price between 0.99 and 1.49 + and i_item_sk = inv_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_date between date_sub(cast ('2000-03-11' as date), 30 ) + and date_add(cast ('2000-03-11' as date), 30 ) + group by w_warehouse_name, i_item_id) x + where (case when inv_before > 0 + then inv_after / inv_before + else null + end) between 2.0/3.0 and 3.0/2.0 + order by w_warehouse_name + ,i_item_id + limit 100; +-- end query 21 in stream 0 using template query21.tpl diff --git a/tests/tpcdsbench/src/queries/query22.sql b/tests/tpcdsbench/src/queries/query22.sql new file mode 100644 index 000000000..b3a125184 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query22.sql @@ -0,0 +1,19 @@ +-- start query 22 in stream 0 using template query22.tpl and seed QUALIFICATION + select i_product_name + ,i_brand + ,i_class + ,i_category + ,avg(inv_quantity_on_hand) qoh + from inventory + ,date_dim + ,item + where inv_date_sk=d_date_sk + and inv_item_sk=i_item_sk + and d_month_seq between 1200 and 1200 + 11 + group by rollup(i_product_name + ,i_brand + ,i_class + ,i_category) +order by qoh, i_product_name, i_brand, i_class, i_category + limit 100; +-- end query 22 in stream 0 using template query22.tpl diff --git a/tests/tpcdsbench/src/queries/query23.sql b/tests/tpcdsbench/src/queries/query23.sql new file mode 100644 index 000000000..043f4c282 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query23.sql @@ -0,0 +1,106 @@ +-- start query 23 in stream 0 using template query23.tpl and seed QUALIFICATION + with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (50/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 2 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 2 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) + limit 100; + with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000 + 1,2000 + 2,2000 + 3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk)), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (50/100.0) * (select + * + from max_store_sales)) + select c_last_name,c_first_name,sales + from (select c_last_name,c_first_name,sum(cs_quantity*cs_list_price) sales + from catalog_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 2 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and cs_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name + union all + select c_last_name,c_first_name,sum(ws_quantity*ws_list_price) sales + from web_sales + ,customer + ,date_dim + where d_year = 2000 + and d_moy = 2 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer) + and ws_bill_customer_sk = c_customer_sk + group by c_last_name,c_first_name) + order by c_last_name,c_first_name,sales + limit 100; +-- end query 23 in stream 0 using template query23.tpl diff --git a/tests/tpcdsbench/src/queries/query24.sql b/tests/tpcdsbench/src/queries/query24.sql new file mode 100644 index 000000000..a67136ce1 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query24.sql @@ -0,0 +1,106 @@ +-- start query 24 in stream 0 using template query24.tpl and seed QUALIFICATION +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip +and s_market_id=8 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'pale' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name +; +with ssales as +(select c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size + ,sum(ss_net_paid) netpaid +from store_sales + ,store_returns + ,store + ,item + ,customer + ,customer_address +where ss_ticket_number = sr_ticket_number + and ss_item_sk = sr_item_sk + and ss_customer_sk = c_customer_sk + and ss_item_sk = i_item_sk + and ss_store_sk = s_store_sk + and c_current_addr_sk = ca_address_sk + and c_birth_country <> upper(ca_country) + and s_zip = ca_zip + and s_market_id = 8 +group by c_last_name + ,c_first_name + ,s_store_name + ,ca_state + ,s_state + ,i_color + ,i_current_price + ,i_manager_id + ,i_units + ,i_size) +select c_last_name + ,c_first_name + ,s_store_name + ,sum(netpaid) paid +from ssales +where i_color = 'chiffon' +group by c_last_name + ,c_first_name + ,s_store_name +having sum(netpaid) > (select 0.05*avg(netpaid) + from ssales) +order by c_last_name + ,c_first_name + ,s_store_name +; +-- end query 24 in stream 0 using template query24.tpl diff --git a/tests/tpcdsbench/src/queries/query25.sql b/tests/tpcdsbench/src/queries/query25.sql new file mode 100644 index 000000000..4607aa0c8 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query25.sql @@ -0,0 +1,47 @@ +-- start query 25 in stream 0 using template query25.tpl and seed QUALIFICATION + select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_net_profit) as store_sales_profit + ,sum(sr_net_loss) as store_returns_loss + ,sum(cs_net_profit) as catalog_sales_profit + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 4 + and d1.d_year = 2001 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 4 and 10 + and d2.d_year = 2001 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_moy between 4 and 10 + and d3.d_year = 2001 + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100; +-- end query 25 in stream 0 using template query25.tpl diff --git a/tests/tpcdsbench/src/queries/query26.sql b/tests/tpcdsbench/src/queries/query26.sql new file mode 100644 index 000000000..76d1a723b --- /dev/null +++ b/tests/tpcdsbench/src/queries/query26.sql @@ -0,0 +1,20 @@ +-- start query 26 in stream 0 using template query26.tpl and seed QUALIFICATION + select i_item_id, + avg(cs_quantity) agg1, + avg(cs_list_price) agg2, + avg(cs_coupon_amt) agg3, + avg(cs_sales_price) agg4 + from catalog_sales, customer_demographics, date_dim, item, promotion + where cs_sold_date_sk = d_date_sk and + cs_item_sk = i_item_sk and + cs_bill_cdemo_sk = cd_demo_sk and + cs_promo_sk = p_promo_sk and + cd_gender = 'M' and + cd_marital_status = 'S' and + cd_education_status = 'College' and + (p_channel_email = 'N' or p_channel_event = 'N') and + d_year = 2000 + group by i_item_id + order by i_item_id + limit 100; +-- end query 26 in stream 0 using template query26.tpl diff --git a/tests/tpcdsbench/src/queries/query27.sql b/tests/tpcdsbench/src/queries/query27.sql new file mode 100644 index 000000000..5b49c23e3 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query27.sql @@ -0,0 +1,22 @@ +-- start query 27 in stream 0 using template query27.tpl and seed QUALIFICATION + select i_item_id, + s_state, grouping(s_state) g_state, + avg(ss_quantity) agg1, + avg(ss_list_price) agg2, + avg(ss_coupon_amt) agg3, + avg(ss_sales_price) agg4 + from store_sales, customer_demographics, date_dim, store, item + where ss_sold_date_sk = d_date_sk and + ss_item_sk = i_item_sk and + ss_store_sk = s_store_sk and + ss_cdemo_sk = cd_demo_sk and + cd_gender = 'M' and + cd_marital_status = 'S' and + cd_education_status = 'College' and + d_year = 2002 and + s_state in ('TN','TN', 'TN', 'TN', 'TN', 'TN') + group by rollup (i_item_id, s_state) + order by i_item_id + ,s_state + limit 100; +-- end query 27 in stream 0 using template query27.tpl diff --git a/tests/tpcdsbench/src/queries/query28.sql b/tests/tpcdsbench/src/queries/query28.sql new file mode 100644 index 000000000..3e4e4bef3 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query28.sql @@ -0,0 +1,52 @@ +-- start query 28 in stream 0 using template query28.tpl and seed QUALIFICATION + select * +from (select avg(ss_list_price) B1_LP + ,count(ss_list_price) B1_CNT + ,count(distinct ss_list_price) B1_CNTD + from store_sales + where ss_quantity between 0 and 5 + and (ss_list_price between 8 and 8+10 + or ss_coupon_amt between 459 and 459+1000 + or ss_wholesale_cost between 57 and 57+20)) B1, + (select avg(ss_list_price) B2_LP + ,count(ss_list_price) B2_CNT + ,count(distinct ss_list_price) B2_CNTD + from store_sales + where ss_quantity between 6 and 10 + and (ss_list_price between 90 and 90+10 + or ss_coupon_amt between 2323 and 2323+1000 + or ss_wholesale_cost between 31 and 31+20)) B2, + (select avg(ss_list_price) B3_LP + ,count(ss_list_price) B3_CNT + ,count(distinct ss_list_price) B3_CNTD + from store_sales + where ss_quantity between 11 and 15 + and (ss_list_price between 142 and 142+10 + or ss_coupon_amt between 12214 and 12214+1000 + or ss_wholesale_cost between 79 and 79+20)) B3, + (select avg(ss_list_price) B4_LP + ,count(ss_list_price) B4_CNT + ,count(distinct ss_list_price) B4_CNTD + from store_sales + where ss_quantity between 16 and 20 + and (ss_list_price between 135 and 135+10 + or ss_coupon_amt between 6071 and 6071+1000 + or ss_wholesale_cost between 38 and 38+20)) B4, + (select avg(ss_list_price) B5_LP + ,count(ss_list_price) B5_CNT + ,count(distinct ss_list_price) B5_CNTD + from store_sales + where ss_quantity between 21 and 25 + and (ss_list_price between 122 and 122+10 + or ss_coupon_amt between 836 and 836+1000 + or ss_wholesale_cost between 17 and 17+20)) B5, + (select avg(ss_list_price) B6_LP + ,count(ss_list_price) B6_CNT + ,count(distinct ss_list_price) B6_CNTD + from store_sales + where ss_quantity between 26 and 30 + and (ss_list_price between 154 and 154+10 + or ss_coupon_amt between 7326 and 7326+1000 + or ss_wholesale_cost between 7 and 7+20)) B6 + limit 100; +-- end query 28 in stream 0 using template query28.tpl diff --git a/tests/tpcdsbench/src/queries/query29.sql b/tests/tpcdsbench/src/queries/query29.sql new file mode 100644 index 000000000..f3ae0d7e9 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query29.sql @@ -0,0 +1,46 @@ +-- start query 29 in stream 0 using template query29.tpl and seed QUALIFICATION + select + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + ,sum(ss_quantity) as store_sales_quantity + ,sum(sr_return_quantity) as store_returns_quantity + ,sum(cs_quantity) as catalog_sales_quantity + from + store_sales + ,store_returns + ,catalog_sales + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,item + where + d1.d_moy = 9 + and d1.d_year = 1999 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and ss_customer_sk = sr_customer_sk + and ss_item_sk = sr_item_sk + and ss_ticket_number = sr_ticket_number + and sr_returned_date_sk = d2.d_date_sk + and d2.d_moy between 9 and 9 + 3 + and d2.d_year = 1999 + and sr_customer_sk = cs_bill_customer_sk + and sr_item_sk = cs_item_sk + and cs_sold_date_sk = d3.d_date_sk + and d3.d_year in (1999,1999+1,1999+2) + group by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + order by + i_item_id + ,i_item_desc + ,s_store_id + ,s_store_name + limit 100; +-- end query 29 in stream 0 using template query29.tpl diff --git a/tests/tpcdsbench/src/queries/query30.sql b/tests/tpcdsbench/src/queries/query30.sql new file mode 100644 index 000000000..a13deccbb --- /dev/null +++ b/tests/tpcdsbench/src/queries/query30.sql @@ -0,0 +1,30 @@ +-- start query 30 in stream 0 using template query30.tpl and seed QUALIFICATION + with customer_total_return as + (select wr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(wr_return_amt) as ctr_total_return + from web_returns + ,date_dim + ,customer_address + where wr_returned_date_sk = d_date_sk + and d_year =2002 + and wr_returning_addr_sk = ca_address_sk + group by wr_returning_customer_sk + ,ca_state) + select c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'GA' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag + ,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address + ,c_last_review_date,ctr_total_return + limit 100; +-- end query 30 in stream 0 using template query30.tpl diff --git a/tests/tpcdsbench/src/queries/query31.sql b/tests/tpcdsbench/src/queries/query31.sql new file mode 100644 index 000000000..4bd09d830 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query31.sql @@ -0,0 +1,51 @@ +-- start query 31 in stream 0 using template query31.tpl and seed QUALIFICATION + with ss as + (select ca_county,d_qoy, d_year,sum(ss_ext_sales_price) as store_sales + from store_sales,date_dim,customer_address + where ss_sold_date_sk = d_date_sk + and ss_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year), + ws as + (select ca_county,d_qoy, d_year,sum(ws_ext_sales_price) as web_sales + from web_sales,date_dim,customer_address + where ws_sold_date_sk = d_date_sk + and ws_bill_addr_sk=ca_address_sk + group by ca_county,d_qoy, d_year) + select + ss1.ca_county + ,ss1.d_year + ,ws2.web_sales/ws1.web_sales web_q1_q2_increase + ,ss2.store_sales/ss1.store_sales store_q1_q2_increase + ,ws3.web_sales/ws2.web_sales web_q2_q3_increase + ,ss3.store_sales/ss2.store_sales store_q2_q3_increase + from + ss ss1 + ,ss ss2 + ,ss ss3 + ,ws ws1 + ,ws ws2 + ,ws ws3 + where + ss1.d_qoy = 1 + and ss1.d_year = 2000 + and ss1.ca_county = ss2.ca_county + and ss2.d_qoy = 2 + and ss2.d_year = 2000 + and ss2.ca_county = ss3.ca_county + and ss3.d_qoy = 3 + and ss3.d_year = 2000 + and ss1.ca_county = ws1.ca_county + and ws1.d_qoy = 1 + and ws1.d_year = 2000 + and ws1.ca_county = ws2.ca_county + and ws2.d_qoy = 2 + and ws2.d_year = 2000 + and ws1.ca_county = ws3.ca_county + and ws3.d_qoy = 3 + and ws3.d_year =2000 + and case when ws1.web_sales > 0 then ws2.web_sales/ws1.web_sales else null end + > case when ss1.store_sales > 0 then ss2.store_sales/ss1.store_sales else null end + and case when ws2.web_sales > 0 then ws3.web_sales/ws2.web_sales else null end + > case when ss2.store_sales > 0 then ss3.store_sales/ss2.store_sales else null end + order by ss1.ca_county; +-- end query 31 in stream 0 using template query31.tpl diff --git a/tests/tpcdsbench/src/queries/query32.sql b/tests/tpcdsbench/src/queries/query32.sql new file mode 100644 index 000000000..af775fb91 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query32.sql @@ -0,0 +1,27 @@ +-- start query 32 in stream 0 using template query32.tpl and seed QUALIFICATION +select sum(cs_ext_discount_amt) as excess_discount_amount +from + catalog_sales + ,item + ,date_dim +where +i_manufact_id = 977 +and i_item_sk = cs_item_sk +and d_date between '2000-01-27' and + date_add(cast('2000-01-27' as date), 90 ) +and d_date_sk = cs_sold_date_sk +and cs_ext_discount_amt + > ( + select + 1.3 * avg(cs_ext_discount_amt) + from + catalog_sales + ,date_dim + where + cs_item_sk = i_item_sk + and d_date between '2000-01-27' and + date_add(cast('2000-01-27' as date), 90 ) + and d_date_sk = cs_sold_date_sk + ) + limit 100; +-- end query 32 in stream 0 using template query32.tpl diff --git a/tests/tpcdsbench/src/queries/query33.sql b/tests/tpcdsbench/src/queries/query33.sql new file mode 100644 index 000000000..bfd564bd2 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query33.sql @@ -0,0 +1,74 @@ +-- start query 33 in stream 0 using template query33.tpl and seed QUALIFICATION + with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales + limit 100; +-- end query 33 in stream 0 using template query33.tpl diff --git a/tests/tpcdsbench/src/queries/query34.sql b/tests/tpcdsbench/src/queries/query34.sql new file mode 100644 index 000000000..d3dab595d --- /dev/null +++ b/tests/tpcdsbench/src/queries/query34.sql @@ -0,0 +1,30 @@ +-- start query 34 in stream 0 using template query34.tpl and seed QUALIFICATION + select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (date_dim.d_dom between 1 and 3 or date_dim.d_dom between 25 and 28) + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and (case when household_demographics.hd_vehicle_count > 0 + then household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count + else null + end) > 1.2 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Williamson County','Williamson County','Williamson County','Williamson County', + 'Williamson County','Williamson County','Williamson County','Williamson County') + group by ss_ticket_number,ss_customer_sk) dn,customer + where ss_customer_sk = c_customer_sk + and cnt between 15 and 20 + order by c_last_name,c_first_name,c_salutation,c_preferred_cust_flag desc, ss_ticket_number; +-- end query 34 in stream 0 using template query34.tpl diff --git a/tests/tpcdsbench/src/queries/query35.sql b/tests/tpcdsbench/src/queries/query35.sql new file mode 100644 index 000000000..56246f25b --- /dev/null +++ b/tests/tpcdsbench/src/queries/query35.sql @@ -0,0 +1,57 @@ +-- start query 35 in stream 0 using template query35.tpl and seed QUALIFICATION + select + ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + count(*) cnt1, + min(cd_dep_count), + max(cd_dep_count), + avg(cd_dep_count), + cd_dep_employed_count, + count(*) cnt2, + min(cd_dep_employed_count), + max(cd_dep_employed_count), + avg(cd_dep_employed_count), + cd_dep_college_count, + count(*) cnt3, + min(cd_dep_college_count), + max(cd_dep_college_count), + avg(cd_dep_college_count) + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) and + (exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4) or + exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2002 and + d_qoy < 4)) + group by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + order by ca_state, + cd_gender, + cd_marital_status, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count + limit 100; +-- end query 35 in stream 0 using template query35.tpl diff --git a/tests/tpcdsbench/src/queries/query36.sql b/tests/tpcdsbench/src/queries/query36.sql new file mode 100644 index 000000000..13775d9eb --- /dev/null +++ b/tests/tpcdsbench/src/queries/query36.sql @@ -0,0 +1,29 @@ +-- start query 36 in stream 0 using template query36.tpl and seed QUALIFICATION + select + sum(ss_net_profit)/sum(ss_ext_sales_price) as gross_margin + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ss_net_profit)/sum(ss_ext_sales_price) asc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,item + ,store + where + d1.d_year = 2001 + and d1.d_date_sk = ss_sold_date_sk + and i_item_sk = ss_item_sk + and s_store_sk = ss_store_sk + and s_state in ('TN','TN','TN','TN', + 'TN','TN','TN','TN') + group by rollup(i_category,i_class) + order by + lochierarchy desc + ,case when lochierarchy = 0 then i_category end + ,rank_within_parent + limit 100; +-- end query 36 in stream 0 using template query36.tpl diff --git a/tests/tpcdsbench/src/queries/query37.sql b/tests/tpcdsbench/src/queries/query37.sql new file mode 100644 index 000000000..8ddbadc22 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query37.sql @@ -0,0 +1,16 @@ +-- start query 37 in stream 0 using template query37.tpl and seed QUALIFICATION + select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, catalog_sales + where i_current_price between 68 and 68 + 30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2000-02-01' as date) and date_add(cast('2000-02-01' as date), 60 ) + and i_manufact_id in (677,940,694,808) + and inv_quantity_on_hand between 100 and 500 + and cs_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100; +-- end query 37 in stream 0 using template query37.tpl diff --git a/tests/tpcdsbench/src/queries/query38.sql b/tests/tpcdsbench/src/queries/query38.sql new file mode 100644 index 000000000..9ac3f0237 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query38.sql @@ -0,0 +1,22 @@ +-- start query 38 in stream 0 using template query38.tpl and seed QUALIFICATION + select count(*) from ( + select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1200 and 1200 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1200 and 1200 + 11 + intersect + select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1200 and 1200 + 11 +) hot_cust + limit 100; +-- end query 38 in stream 0 using template query38.tpl diff --git a/tests/tpcdsbench/src/queries/query39.sql b/tests/tpcdsbench/src/queries/query39.sql new file mode 100644 index 000000000..4ca3b8386 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query39.sql @@ -0,0 +1,53 @@ +-- start query 39 in stream 0 using template query39.tpl and seed QUALIFICATION +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2001 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk as w_warehouse_sk_2,inv2.i_item_sk as i_item_sk_2,inv2.d_moy as d_moy_2,inv2.mean as mean_2, inv2.cov as cov_2 +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=1 + and inv2.d_moy=1+1 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,d_moy_2,mean_2, cov_2 +; +with inv as +(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stdev,mean, case mean when 0 then null else stdev/mean end cov + from(select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy + ,stddev_samp(inv_quantity_on_hand) stdev,avg(inv_quantity_on_hand) mean + from inventory + ,item + ,warehouse + ,date_dim + where inv_item_sk = i_item_sk + and inv_warehouse_sk = w_warehouse_sk + and inv_date_sk = d_date_sk + and d_year =2001 + group by w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy) foo + where case mean when 0 then 0 else stdev/mean end > 1) +select inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean, inv1.cov + ,inv2.w_warehouse_sk as w_warehouse_sk_2,inv2.i_item_sk as i_item_sk_2,inv2.d_moy as d_moy_2,inv2.mean as mean_2, inv2.cov as cov_2 +from inv inv1,inv inv2 +where inv1.i_item_sk = inv2.i_item_sk + and inv1.w_warehouse_sk = inv2.w_warehouse_sk + and inv1.d_moy=1 + and inv2.d_moy=1+1 + and inv1.cov > 1.5 +order by inv1.w_warehouse_sk,inv1.i_item_sk,inv1.d_moy,inv1.mean,inv1.cov + ,d_moy_2,mean_2, cov_2 +; +-- end query 39 in stream 0 using template query39.tpl diff --git a/tests/tpcdsbench/src/queries/query40.sql b/tests/tpcdsbench/src/queries/query40.sql new file mode 100644 index 000000000..c53e262ce --- /dev/null +++ b/tests/tpcdsbench/src/queries/query40.sql @@ -0,0 +1,27 @@ +-- start query 40 in stream 0 using template query40.tpl and seed QUALIFICATION + select + w_state + ,i_item_id + ,sum(case when (cast(d_date as date) < cast ('2000-03-11' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_before + ,sum(case when (cast(d_date as date) >= cast ('2000-03-11' as date)) + then cs_sales_price - coalesce(cr_refunded_cash,0) else 0 end) as sales_after + from + catalog_sales left outer join catalog_returns on + (cs_order_number = cr_order_number + and cs_item_sk = cr_item_sk) + ,warehouse + ,item + ,date_dim + where + i_current_price between 0.99 and 1.49 + and i_item_sk = cs_item_sk + and cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and d_date between date_sub(cast ('2000-03-11' as date), 30 ) + and date_add(cast ('2000-03-11' as date), 30 ) + group by + w_state,i_item_id + order by w_state,i_item_id + limit 100; +-- end query 40 in stream 0 using template query40.tpl diff --git a/tests/tpcdsbench/src/queries/query41.sql b/tests/tpcdsbench/src/queries/query41.sql new file mode 100644 index 000000000..2b8095699 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query41.sql @@ -0,0 +1,51 @@ +-- start query 41 in stream 0 using template query41.tpl and seed QUALIFICATION + select distinct(i_product_name) + from item i1 + where i_manufact_id between 738 and 738+40 + and (select count(*) as item_cnt + from item + where (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'powder' or i_color = 'khaki') and + (i_units = 'Ounce' or i_units = 'Oz') and + (i_size = 'medium' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'brown' or i_color = 'honeydew') and + (i_units = 'Bunch' or i_units = 'Ton') and + (i_size = 'N/A' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'floral' or i_color = 'deep') and + (i_units = 'N/A' or i_units = 'Dozen') and + (i_size = 'petite' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'light' or i_color = 'cornflower') and + (i_units = 'Box' or i_units = 'Pound') and + (i_size = 'medium' or i_size = 'extra large') + ))) or + (i_manufact = i1.i_manufact and + ((i_category = 'Women' and + (i_color = 'midnight' or i_color = 'snow') and + (i_units = 'Pallet' or i_units = 'Gross') and + (i_size = 'medium' or i_size = 'extra large') + ) or + (i_category = 'Women' and + (i_color = 'cyan' or i_color = 'papaya') and + (i_units = 'Cup' or i_units = 'Dram') and + (i_size = 'N/A' or i_size = 'small') + ) or + (i_category = 'Men' and + (i_color = 'orange' or i_color = 'frosted') and + (i_units = 'Each' or i_units = 'Tbl') and + (i_size = 'petite' or i_size = 'large') + ) or + (i_category = 'Men' and + (i_color = 'forest' or i_color = 'ghost') and + (i_units = 'Lb' or i_units = 'Bundle') and + (i_size = 'medium' or i_size = 'extra large') + )))) > 0 + order by i_product_name + limit 100; +-- end query 41 in stream 0 using template query41.tpl diff --git a/tests/tpcdsbench/src/queries/query42.sql b/tests/tpcdsbench/src/queries/query42.sql new file mode 100644 index 000000000..11722cde0 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query42.sql @@ -0,0 +1,21 @@ +-- start query 42 in stream 0 using template query42.tpl and seed QUALIFICATION + select dt.d_year + ,item.i_category_id + ,item.i_category + ,sum(ss_ext_sales_price) + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2000 + group by dt.d_year + ,item.i_category_id + ,item.i_category + order by sum(ss_ext_sales_price) desc,dt.d_year + ,item.i_category_id + ,item.i_category + limit 100 ; +-- end query 42 in stream 0 using template query42.tpl diff --git a/tests/tpcdsbench/src/queries/query43.sql b/tests/tpcdsbench/src/queries/query43.sql new file mode 100644 index 000000000..74f47fd23 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query43.sql @@ -0,0 +1,18 @@ +-- start query 43 in stream 0 using template query43.tpl and seed QUALIFICATION + select s_store_name, s_store_id, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from date_dim, store_sales, store + where d_date_sk = ss_sold_date_sk and + s_store_sk = ss_store_sk and + s_gmt_offset = -5 and + d_year = 2000 + group by s_store_name, s_store_id + order by s_store_name, s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales + limit 100; +-- end query 43 in stream 0 using template query43.tpl diff --git a/tests/tpcdsbench/src/queries/query44.sql b/tests/tpcdsbench/src/queries/query44.sql new file mode 100644 index 000000000..4ad10b08f --- /dev/null +++ b/tests/tpcdsbench/src/queries/query44.sql @@ -0,0 +1,34 @@ +-- start query 44 in stream 0 using template query44.tpl and seed QUALIFICATION + select asceding.rnk, i1.i_product_name best_performing, i2.i_product_name worst_performing +from(select * + from (select item_sk,rank() over (order by rank_col asc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 4 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 4 + and ss_addr_sk is null + group by ss_store_sk))V1)V11 + where rnk < 11) asceding, + (select * + from (select item_sk,rank() over (order by rank_col desc) rnk + from (select ss_item_sk item_sk,avg(ss_net_profit) rank_col + from store_sales ss1 + where ss_store_sk = 4 + group by ss_item_sk + having avg(ss_net_profit) > 0.9*(select avg(ss_net_profit) rank_col + from store_sales + where ss_store_sk = 4 + and ss_addr_sk is null + group by ss_store_sk))V2)V21 + where rnk < 11) descending, +item i1, +item i2 +where asceding.rnk = descending.rnk + and i1.i_item_sk=asceding.item_sk + and i2.i_item_sk=descending.item_sk +order by asceding.rnk + limit 100; +-- end query 44 in stream 0 using template query44.tpl diff --git a/tests/tpcdsbench/src/queries/query45.sql b/tests/tpcdsbench/src/queries/query45.sql new file mode 100644 index 000000000..fc1689210 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query45.sql @@ -0,0 +1,19 @@ +-- start query 45 in stream 0 using template query45.tpl and seed QUALIFICATION + select ca_zip, ca_city, sum(ws_sales_price) + from web_sales, customer, customer_address, date_dim, item + where ws_bill_customer_sk = c_customer_sk + and c_current_addr_sk = ca_address_sk + and ws_item_sk = i_item_sk + and ( substr(ca_zip,1,5) in ('85669', '86197','88274','83405','86475', '85392', '85460', '80348', '81792') + or + i_item_id in (select i_item_id + from item + where i_item_sk in (2, 3, 5, 7, 11, 13, 17, 19, 23, 29) + ) + ) + and ws_sold_date_sk = d_date_sk + and d_qoy = 2 and d_year = 2001 + group by ca_zip, ca_city + order by ca_zip, ca_city + limit 100; +-- end query 45 in stream 0 using template query45.tpl diff --git a/tests/tpcdsbench/src/queries/query46.sql b/tests/tpcdsbench/src/queries/query46.sql new file mode 100644 index 000000000..3d3212922 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query46.sql @@ -0,0 +1,34 @@ +-- start query 46 in stream 0 using template query46.tpl and seed QUALIFICATION + select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and (household_demographics.hd_dep_count = 4 or + household_demographics.hd_vehicle_count= 3) + and date_dim.d_dow in (6,0) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Fairview','Midway','Fairview','Fairview','Fairview') + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,ca_city) dn,customer,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + limit 100; +-- end query 46 in stream 0 using template query46.tpl diff --git a/tests/tpcdsbench/src/queries/query47.sql b/tests/tpcdsbench/src/queries/query47.sql new file mode 100644 index 000000000..17c7edd36 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query47.sql @@ -0,0 +1,50 @@ +-- start query 47 in stream 0 using template query47.tpl and seed QUALIFICATION + with v1 as( + select i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, + s_store_name, s_company_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + s_store_name, s_company_name + order by d_year, d_moy) rn + from item, store_sales, date_dim, store + where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + ( + d_year = 1999 or + ( d_year = 1999-1 and d_moy =12) or + ( d_year = 1999+1 and d_moy =1) + ) + group by i_category, i_brand, + s_store_name, s_company_name, + d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.s_store_name, v1.s_company_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1.s_store_name = v1_lag.s_store_name and + v1.s_store_name = v1_lead.s_store_name and + v1.s_company_name = v1_lag.s_company_name and + v1.s_company_name = v1_lead.s_company_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 1999 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100; +-- end query 47 in stream 0 using template query47.tpl diff --git a/tests/tpcdsbench/src/queries/query48.sql b/tests/tpcdsbench/src/queries/query48.sql new file mode 100644 index 000000000..b443f7407 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query48.sql @@ -0,0 +1,66 @@ +-- start query 48 in stream 0 using template query48.tpl and seed QUALIFICATION + select sum (ss_quantity) + from store_sales, store, customer_demographics, customer_address, date_dim + where s_store_sk = ss_store_sk + and ss_sold_date_sk = d_date_sk and d_year = 2000 + and + ( + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'M' + and + cd_education_status = '4 yr Degree' + and + ss_sales_price between 100.00 and 150.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'D' + and + cd_education_status = '2 yr Degree' + and + ss_sales_price between 50.00 and 100.00 + ) + or + ( + cd_demo_sk = ss_cdemo_sk + and + cd_marital_status = 'S' + and + cd_education_status = 'College' + and + ss_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('CO', 'OH', 'TX') + and ss_net_profit between 0 and 2000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('OR', 'MN', 'KY') + and ss_net_profit between 150 and 3000 + ) + or + (ss_addr_sk = ca_address_sk + and + ca_country = 'United States' + and + ca_state in ('VA', 'CA', 'MS') + and ss_net_profit between 50 and 25000 + ) + ) +; +-- end query 48 in stream 0 using template query48.tpl diff --git a/tests/tpcdsbench/src/queries/query49.sql b/tests/tpcdsbench/src/queries/query49.sql new file mode 100644 index 000000000..4a857e6a0 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query49.sql @@ -0,0 +1,126 @@ +-- start query 49 in stream 0 using template query49.tpl and seed QUALIFICATION + select + 'web' as channel + ,web.item + ,web.return_ratio + ,web.return_rank + ,web.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select ws.ws_item_sk as item + ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ + cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + web_sales ws left outer join web_returns wr + on (ws.ws_order_number = wr.wr_order_number and + ws.ws_item_sk = wr.wr_item_sk) + ,date_dim + where + wr.wr_return_amt > 10000 + and ws.ws_net_profit > 1 + and ws.ws_net_paid > 0 + and ws.ws_quantity > 0 + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 12 + group by ws.ws_item_sk + ) in_web + ) web + where + ( + web.return_rank <= 10 + or + web.currency_rank <= 10 + ) + union + select + 'catalog' as channel + ,catalog.item + ,catalog.return_ratio + ,catalog.return_rank + ,catalog.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select + cs.cs_item_sk as item + ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ + cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + catalog_sales cs left outer join catalog_returns cr + on (cs.cs_order_number = cr.cr_order_number and + cs.cs_item_sk = cr.cr_item_sk) + ,date_dim + where + cr.cr_return_amount > 10000 + and cs.cs_net_profit > 1 + and cs.cs_net_paid > 0 + and cs.cs_quantity > 0 + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 12 + group by cs.cs_item_sk + ) in_cat + ) catalog + where + ( + catalog.return_rank <= 10 + or + catalog.currency_rank <=10 + ) + union + select + 'store' as channel + ,store.item + ,store.return_ratio + ,store.return_rank + ,store.currency_rank + from ( + select + item + ,return_ratio + ,currency_ratio + ,rank() over (order by return_ratio) as return_rank + ,rank() over (order by currency_ratio) as currency_rank + from + ( select sts.ss_item_sk as item + ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as return_ratio + ,(cast(sum(coalesce(sr.sr_return_amt,0)) as decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as currency_ratio + from + store_sales sts left outer join store_returns sr + on (sts.ss_ticket_number = sr.sr_ticket_number and sts.ss_item_sk = sr.sr_item_sk) + ,date_dim + where + sr.sr_return_amt > 10000 + and sts.ss_net_profit > 1 + and sts.ss_net_paid > 0 + and sts.ss_quantity > 0 + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 12 + group by sts.ss_item_sk + ) in_store + ) store + where ( + store.return_rank <= 10 + or + store.currency_rank <= 10 + ) + order by 1,4,5 + limit 100; +-- end query 49 in stream 0 using template query49.tpl diff --git a/tests/tpcdsbench/src/queries/query50.sql b/tests/tpcdsbench/src/queries/query50.sql new file mode 100644 index 000000000..94b67a928 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query50.sql @@ -0,0 +1,58 @@ +-- start query 50 in stream 0 using template query50.tpl and seed QUALIFICATION + select + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk <= 30 ) then 1 else 0 end) as 30_days + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 30) and + (sr_returned_date_sk - ss_sold_date_sk <= 60) then 1 else 0 end ) as 31_60_days + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 60) and + (sr_returned_date_sk - ss_sold_date_sk <= 90) then 1 else 0 end) as 61_90_days + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 90) and + (sr_returned_date_sk - ss_sold_date_sk <= 120) then 1 else 0 end) as 91_120_days + ,sum(case when (sr_returned_date_sk - ss_sold_date_sk > 120) then 1 else 0 end) as above120_days +from + store_sales + ,store_returns + ,store + ,date_dim d1 + ,date_dim d2 +where + d2.d_year = 2001 +and d2.d_moy = 8 +and ss_ticket_number = sr_ticket_number +and ss_item_sk = sr_item_sk +and ss_sold_date_sk = d1.d_date_sk +and sr_returned_date_sk = d2.d_date_sk +and ss_customer_sk = sr_customer_sk +and ss_store_sk = s_store_sk +group by + s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip +order by s_store_name + ,s_company_id + ,s_street_number + ,s_street_name + ,s_street_type + ,s_suite_number + ,s_city + ,s_county + ,s_state + ,s_zip + limit 100; +-- end query 50 in stream 0 using template query50.tpl diff --git a/tests/tpcdsbench/src/queries/query51.sql b/tests/tpcdsbench/src/queries/query51.sql new file mode 100644 index 000000000..448ce5205 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query51.sql @@ -0,0 +1,44 @@ +-- start query 51 in stream 0 using template query51.tpl and seed QUALIFICATION +WITH web_v1 as ( +select + ws_item_sk item_sk, d_date, + sum(sum(ws_sales_price)) + over (partition by ws_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from web_sales + ,date_dim +where ws_sold_date_sk=d_date_sk + and d_month_seq between 1200 and 1200+11 + and ws_item_sk is not NULL +group by ws_item_sk, d_date), +store_v1 as ( +select + ss_item_sk item_sk, d_date, + sum(sum(ss_sales_price)) + over (partition by ss_item_sk order by d_date rows between unbounded preceding and current row) cume_sales +from store_sales + ,date_dim +where ss_sold_date_sk=d_date_sk + and d_month_seq between 1200 and 1200+11 + and ss_item_sk is not NULL +group by ss_item_sk, d_date) + select * +from (select item_sk + ,d_date + ,web_sales + ,store_sales + ,max(web_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) web_cumulative + ,max(store_sales) + over (partition by item_sk order by d_date rows between unbounded preceding and current row) store_cumulative + from (select case when web.item_sk is not null then web.item_sk else store.item_sk end item_sk + ,case when web.d_date is not null then web.d_date else store.d_date end d_date + ,web.cume_sales web_sales + ,store.cume_sales store_sales + from web_v1 web full outer join store_v1 store on (web.item_sk = store.item_sk + and web.d_date = store.d_date) + )x )y +where web_cumulative > store_cumulative +order by item_sk + ,d_date + limit 100; +-- end query 51 in stream 0 using template query51.tpl diff --git a/tests/tpcdsbench/src/queries/query52.sql b/tests/tpcdsbench/src/queries/query52.sql new file mode 100644 index 000000000..7982bcd96 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query52.sql @@ -0,0 +1,21 @@ +-- start query 52 in stream 0 using template query52.tpl and seed QUALIFICATION + select dt.d_year + ,item.i_brand_id brand_id + ,item.i_brand brand + ,sum(ss_ext_sales_price) ext_price + from date_dim dt + ,store_sales + ,item + where dt.d_date_sk = store_sales.ss_sold_date_sk + and store_sales.ss_item_sk = item.i_item_sk + and item.i_manager_id = 1 + and dt.d_moy=11 + and dt.d_year=2000 + group by dt.d_year + ,item.i_brand + ,item.i_brand_id + order by dt.d_year + ,ext_price desc + ,brand_id + limit 100 ; +-- end query 52 in stream 0 using template query52.tpl diff --git a/tests/tpcdsbench/src/queries/query53.sql b/tests/tpcdsbench/src/queries/query53.sql new file mode 100644 index 000000000..8e1e1b127 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query53.sql @@ -0,0 +1,27 @@ +-- start query 53 in stream 0 using template query53.tpl and seed QUALIFICATION + select * from +(select i_manufact_id, +sum(ss_sales_price) sum_sales, +avg(sum(ss_sales_price)) over (partition by i_manufact_id) avg_quarterly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and +ss_sold_date_sk = d_date_sk and +ss_store_sk = s_store_sk and +d_month_seq in (1200,1200+1,1200+2,1200+3,1200+4,1200+5,1200+6,1200+7,1200+8,1200+9,1200+10,1200+11) and +((i_category in ('Books','Children','Electronics') and +i_class in ('personal','portable','reference','self-help') and +i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) +or(i_category in ('Women','Music','Men') and +i_class in ('accessories','classical','fragrances','pants') and +i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manufact_id, d_qoy ) tmp1 +where case when avg_quarterly_sales > 0 + then abs (sum_sales - avg_quarterly_sales)/ avg_quarterly_sales + else null end > 0.1 +order by avg_quarterly_sales, + sum_sales, + i_manufact_id + limit 100; +-- end query 53 in stream 0 using template query53.tpl diff --git a/tests/tpcdsbench/src/queries/query54.sql b/tests/tpcdsbench/src/queries/query54.sql new file mode 100644 index 000000000..ed3eaa445 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query54.sql @@ -0,0 +1,55 @@ +-- start query 54 in stream 0 using template query54.tpl and seed QUALIFICATION + with my_customers as ( + select distinct c_customer_sk + , c_current_addr_sk + from + ( select cs_sold_date_sk sold_date_sk, + cs_bill_customer_sk customer_sk, + cs_item_sk item_sk + from catalog_sales + union all + select ws_sold_date_sk sold_date_sk, + ws_bill_customer_sk customer_sk, + ws_item_sk item_sk + from web_sales + ) cs_or_ws_sales, + item, + date_dim, + customer + where sold_date_sk = d_date_sk + and item_sk = i_item_sk + and i_category = 'Women' + and i_class = 'maternity' + and c_customer_sk = cs_or_ws_sales.customer_sk + and d_moy = 12 + and d_year = 1998 + ) + , my_revenue as ( + select c_customer_sk, + sum(ss_ext_sales_price) as revenue + from my_customers, + store_sales, + customer_address, + store, + date_dim + where c_current_addr_sk = ca_address_sk + and ca_county = s_county + and ca_state = s_state + and ss_sold_date_sk = d_date_sk + and c_customer_sk = ss_customer_sk + and d_month_seq between (select distinct d_month_seq+1 + from date_dim where d_year = 1998 and d_moy = 12) + and (select distinct d_month_seq+3 + from date_dim where d_year = 1998 and d_moy = 12) + group by c_customer_sk + ) + , segments as + (select cast((revenue/50) as int) as segment + from my_revenue + ) + select segment, count(*) as num_customers, segment*50 as segment_base + from segments + group by segment + order by segment, num_customers + limit 100; +-- end query 54 in stream 0 using template query54.tpl diff --git a/tests/tpcdsbench/src/queries/query55.sql b/tests/tpcdsbench/src/queries/query55.sql new file mode 100644 index 000000000..7c4ce9463 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query55.sql @@ -0,0 +1,13 @@ +-- start query 55 in stream 0 using template query55.tpl and seed QUALIFICATION + select i_brand_id brand_id, i_brand brand, + sum(ss_ext_sales_price) ext_price + from date_dim, store_sales, item + where d_date_sk = ss_sold_date_sk + and ss_item_sk = i_item_sk + and i_manager_id=28 + and d_moy=11 + and d_year=1999 + group by i_brand, i_brand_id + order by ext_price desc, i_brand_id + limit 100 ; +-- end query 55 in stream 0 using template query55.tpl diff --git a/tests/tpcdsbench/src/queries/query56.sql b/tests/tpcdsbench/src/queries/query56.sql new file mode 100644 index 000000000..ccf9c5a03 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query56.sql @@ -0,0 +1,67 @@ +-- start query 56 in stream 0 using template query56.tpl and seed QUALIFICATION + with ss as ( + select i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where i_item_id in (select + i_item_id +from item +where i_color in ('slate','blanched','burnished')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 2 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('slate','blanched','burnished')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 2 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from item +where i_color in ('slate','blanched','burnished')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 2001 + and d_moy = 2 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select i_item_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by total_sales + limit 100; +-- end query 56 in stream 0 using template query56.tpl diff --git a/tests/tpcdsbench/src/queries/query57.sql b/tests/tpcdsbench/src/queries/query57.sql new file mode 100644 index 000000000..4b8900b97 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query57.sql @@ -0,0 +1,47 @@ +-- start query 57 in stream 0 using template query57.tpl and seed QUALIFICATION + with v1 as( + select i_category, i_brand, + cc_name, + d_year, d_moy, + sum(cs_sales_price) sum_sales, + avg(sum(cs_sales_price)) over + (partition by i_category, i_brand, + cc_name, d_year) + avg_monthly_sales, + rank() over + (partition by i_category, i_brand, + cc_name + order by d_year, d_moy) rn + from item, catalog_sales, date_dim, call_center + where cs_item_sk = i_item_sk and + cs_sold_date_sk = d_date_sk and + cc_call_center_sk= cs_call_center_sk and + ( + d_year = 1999 or + ( d_year = 1999-1 and d_moy =12) or + ( d_year = 1999+1 and d_moy =1) + ) + group by i_category, i_brand, + cc_name , d_year, d_moy), + v2 as( + select v1.i_category, v1.i_brand, v1.cc_name + ,v1.d_year, v1.d_moy + ,v1.avg_monthly_sales + ,v1.sum_sales, v1_lag.sum_sales psum, v1_lead.sum_sales nsum + from v1, v1 v1_lag, v1 v1_lead + where v1.i_category = v1_lag.i_category and + v1.i_category = v1_lead.i_category and + v1.i_brand = v1_lag.i_brand and + v1.i_brand = v1_lead.i_brand and + v1. cc_name = v1_lag. cc_name and + v1. cc_name = v1_lead. cc_name and + v1.rn = v1_lag.rn + 1 and + v1.rn = v1_lead.rn - 1) + select * + from v2 + where d_year = 1999 and + avg_monthly_sales > 0 and + case when avg_monthly_sales > 0 then abs(sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 + order by sum_sales - avg_monthly_sales, 3 + limit 100; +-- end query 57 in stream 0 using template query57.tpl diff --git a/tests/tpcdsbench/src/queries/query58.sql b/tests/tpcdsbench/src/queries/query58.sql new file mode 100644 index 000000000..cd31224be --- /dev/null +++ b/tests/tpcdsbench/src/queries/query58.sql @@ -0,0 +1,64 @@ +-- start query 58 in stream 0 using template query58.tpl and seed QUALIFICATION + with ss_items as + (select i_item_id item_id + ,sum(ss_ext_sales_price) ss_item_rev + from store_sales + ,item + ,date_dim + where ss_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-01-03')) + and ss_sold_date_sk = d_date_sk + group by i_item_id), + cs_items as + (select i_item_id item_id + ,sum(cs_ext_sales_price) cs_item_rev + from catalog_sales + ,item + ,date_dim + where cs_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq = (select d_week_seq + from date_dim + where d_date = '2000-01-03')) + and cs_sold_date_sk = d_date_sk + group by i_item_id), + ws_items as + (select i_item_id item_id + ,sum(ws_ext_sales_price) ws_item_rev + from web_sales + ,item + ,date_dim + where ws_item_sk = i_item_sk + and d_date in (select d_date + from date_dim + where d_week_seq =(select d_week_seq + from date_dim + where d_date = '2000-01-03')) + and ws_sold_date_sk = d_date_sk + group by i_item_id) + select ss_items.item_id + ,ss_item_rev + ,ss_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ss_dev + ,cs_item_rev + ,cs_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 cs_dev + ,ws_item_rev + ,ws_item_rev/((ss_item_rev+cs_item_rev+ws_item_rev)/3) * 100 ws_dev + ,(ss_item_rev+cs_item_rev+ws_item_rev)/3 average + from ss_items,cs_items,ws_items + where ss_items.item_id=cs_items.item_id + and ss_items.item_id=ws_items.item_id + and ss_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + and ss_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and cs_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and cs_item_rev between 0.9 * ws_item_rev and 1.1 * ws_item_rev + and ws_item_rev between 0.9 * ss_item_rev and 1.1 * ss_item_rev + and ws_item_rev between 0.9 * cs_item_rev and 1.1 * cs_item_rev + order by item_id + ,ss_item_rev + limit 100; +-- end query 58 in stream 0 using template query58.tpl diff --git a/tests/tpcdsbench/src/queries/query59.sql b/tests/tpcdsbench/src/queries/query59.sql new file mode 100644 index 000000000..753b116ca --- /dev/null +++ b/tests/tpcdsbench/src/queries/query59.sql @@ -0,0 +1,43 @@ +-- start query 59 in stream 0 using template query59.tpl and seed QUALIFICATION + with wss as + (select d_week_seq, + ss_store_sk, + sum(case when (d_day_name='Sunday') then ss_sales_price else null end) sun_sales, + sum(case when (d_day_name='Monday') then ss_sales_price else null end) mon_sales, + sum(case when (d_day_name='Tuesday') then ss_sales_price else null end) tue_sales, + sum(case when (d_day_name='Wednesday') then ss_sales_price else null end) wed_sales, + sum(case when (d_day_name='Thursday') then ss_sales_price else null end) thu_sales, + sum(case when (d_day_name='Friday') then ss_sales_price else null end) fri_sales, + sum(case when (d_day_name='Saturday') then ss_sales_price else null end) sat_sales + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + group by d_week_seq,ss_store_sk + ) + select s_store_name1,s_store_id1,d_week_seq1 + ,sun_sales1/sun_sales2,mon_sales1/mon_sales2 + ,tue_sales1/tue_sales2,wed_sales1/wed_sales2,thu_sales1/thu_sales2 + ,fri_sales1/fri_sales2,sat_sales1/sat_sales2 + from + (select s_store_name s_store_name1,wss.d_week_seq d_week_seq1 + ,s_store_id s_store_id1,sun_sales sun_sales1 + ,mon_sales mon_sales1,tue_sales tue_sales1 + ,wed_sales wed_sales1,thu_sales thu_sales1 + ,fri_sales fri_sales1,sat_sales sat_sales1 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1212 and 1212 + 11) y, + (select s_store_name s_store_name2,wss.d_week_seq d_week_seq2 + ,s_store_id s_store_id2,sun_sales sun_sales2 + ,mon_sales mon_sales2,tue_sales tue_sales2 + ,wed_sales wed_sales2,thu_sales thu_sales2 + ,fri_sales fri_sales2,sat_sales sat_sales2 + from wss,store,date_dim d + where d.d_week_seq = wss.d_week_seq and + ss_store_sk = s_store_sk and + d_month_seq between 1212+ 12 and 1212 + 23) x + where s_store_id1=s_store_id2 + and d_week_seq1=d_week_seq2-52 + order by s_store_name1,s_store_id1,d_week_seq1 + limit 100; +-- end query 59 in stream 0 using template query59.tpl diff --git a/tests/tpcdsbench/src/queries/query60.sql b/tests/tpcdsbench/src/queries/query60.sql new file mode 100644 index 000000000..476bcdb91 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query60.sql @@ -0,0 +1,77 @@ +-- start query 60 in stream 0 using template query60.tpl and seed QUALIFICATION + with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Music')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Music')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Music')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100; +-- end query 60 in stream 0 using template query60.tpl diff --git a/tests/tpcdsbench/src/queries/query61.sql b/tests/tpcdsbench/src/queries/query61.sql new file mode 100644 index 000000000..d4d1415e1 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query61.sql @@ -0,0 +1,43 @@ +-- start query 61 in stream 0 using template query61.tpl and seed QUALIFICATION + select promotions,total,cast(promotions as decimal(15,4))/cast(total as decimal(15,4))*100 +from + (select sum(ss_ext_sales_price) promotions + from store_sales + ,store + ,promotion + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_promo_sk = p_promo_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -5 + and i_category = 'Jewelry' + and (p_channel_dmail = 'Y' or p_channel_email = 'Y' or p_channel_tv = 'Y') + and s_gmt_offset = -5 + and d_year = 1998 + and d_moy = 11) promotional_sales, + (select sum(ss_ext_sales_price) total + from store_sales + ,store + ,date_dim + ,customer + ,customer_address + ,item + where ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and ss_customer_sk= c_customer_sk + and ca_address_sk = c_current_addr_sk + and ss_item_sk = i_item_sk + and ca_gmt_offset = -5 + and i_category = 'Jewelry' + and s_gmt_offset = -5 + and d_year = 1998 + and d_moy = 11) all_sales +order by promotions, total + limit 100; +-- end query 61 in stream 0 using template query61.tpl diff --git a/tests/tpcdsbench/src/queries/query62.sql b/tests/tpcdsbench/src/queries/query62.sql new file mode 100644 index 000000000..35c91cbbe --- /dev/null +++ b/tests/tpcdsbench/src/queries/query62.sql @@ -0,0 +1,34 @@ +-- start query 62 in stream 0 using template query62.tpl and seed QUALIFICATION + select + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk <= 30 ) then 1 else 0 end) as 30_days + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 30) and + (ws_ship_date_sk - ws_sold_date_sk <= 60) then 1 else 0 end ) as 31_60_days + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 60) and + (ws_ship_date_sk - ws_sold_date_sk <= 90) then 1 else 0 end) as 61_90_days + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 90) and + (ws_ship_date_sk - ws_sold_date_sk <= 120) then 1 else 0 end) as 91_120_days + ,sum(case when (ws_ship_date_sk - ws_sold_date_sk > 120) then 1 else 0 end) as above120_days +from + web_sales + ,warehouse + ,ship_mode + ,web_site + ,date_dim +where + d_month_seq between 1200 and 1200 + 11 +and ws_ship_date_sk = d_date_sk +and ws_warehouse_sk = w_warehouse_sk +and ws_ship_mode_sk = sm_ship_mode_sk +and ws_web_site_sk = web_site_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,web_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,web_name + limit 100; +-- end query 62 in stream 0 using template query62.tpl diff --git a/tests/tpcdsbench/src/queries/query63.sql b/tests/tpcdsbench/src/queries/query63.sql new file mode 100644 index 000000000..b5c486741 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query63.sql @@ -0,0 +1,28 @@ +-- start query 63 in stream 0 using template query63.tpl and seed QUALIFICATION + select * +from (select i_manager_id + ,sum(ss_sales_price) sum_sales + ,avg(sum(ss_sales_price)) over (partition by i_manager_id) avg_monthly_sales + from item + ,store_sales + ,date_dim + ,store + where ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and ss_store_sk = s_store_sk + and d_month_seq in (1200,1200+1,1200+2,1200+3,1200+4,1200+5,1200+6,1200+7,1200+8,1200+9,1200+10,1200+11) + and (( i_category in ('Books','Children','Electronics') + and i_class in ('personal','portable','reference','self-help') + and i_brand in ('scholaramalgamalg #14','scholaramalgamalg #7', + 'exportiunivamalg #9','scholaramalgamalg #9')) + or( i_category in ('Women','Music','Men') + and i_class in ('accessories','classical','fragrances','pants') + and i_brand in ('amalgimporto #1','edu packscholar #1','exportiimporto #1', + 'importoamalg #1'))) +group by i_manager_id, d_moy) tmp1 +where case when avg_monthly_sales > 0 then abs (sum_sales - avg_monthly_sales) / avg_monthly_sales else null end > 0.1 +order by i_manager_id + ,avg_monthly_sales + ,sum_sales + limit 100; +-- end query 63 in stream 0 using template query63.tpl diff --git a/tests/tpcdsbench/src/queries/query64.sql b/tests/tpcdsbench/src/queries/query64.sql new file mode 100644 index 000000000..fb2680043 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query64.sql @@ -0,0 +1,118 @@ +-- start query 64 in stream 0 using template query64.tpl and seed QUALIFICATION +with cs_ui as + (select cs_item_sk + ,sum(cs_ext_list_price) as sale,sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit) as refund + from catalog_sales + ,catalog_returns + where cs_item_sk = cr_item_sk + and cs_order_number = cr_order_number + group by cs_item_sk + having sum(cs_ext_list_price)>2*sum(cr_refunded_cash+cr_reversed_charge+cr_store_credit)), +cross_sales as + (select i_product_name product_name + ,i_item_sk item_sk + ,s_store_name store_name + ,s_zip store_zip + ,ad1.ca_street_number b_street_number + ,ad1.ca_street_name b_street_name + ,ad1.ca_city b_city + ,ad1.ca_zip b_zip + ,ad2.ca_street_number c_street_number + ,ad2.ca_street_name c_street_name + ,ad2.ca_city c_city + ,ad2.ca_zip c_zip + ,d1.d_year as syear + ,d2.d_year as fsyear + ,d3.d_year s2year + ,count(*) cnt + ,sum(ss_wholesale_cost) s1 + ,sum(ss_list_price) s2 + ,sum(ss_coupon_amt) s3 + FROM store_sales + ,store_returns + ,cs_ui + ,date_dim d1 + ,date_dim d2 + ,date_dim d3 + ,store + ,customer + ,customer_demographics cd1 + ,customer_demographics cd2 + ,promotion + ,household_demographics hd1 + ,household_demographics hd2 + ,customer_address ad1 + ,customer_address ad2 + ,income_band ib1 + ,income_band ib2 + ,item + WHERE ss_store_sk = s_store_sk AND + ss_sold_date_sk = d1.d_date_sk AND + ss_customer_sk = c_customer_sk AND + ss_cdemo_sk= cd1.cd_demo_sk AND + ss_hdemo_sk = hd1.hd_demo_sk AND + ss_addr_sk = ad1.ca_address_sk and + ss_item_sk = i_item_sk and + ss_item_sk = sr_item_sk and + ss_ticket_number = sr_ticket_number and + ss_item_sk = cs_ui.cs_item_sk and + c_current_cdemo_sk = cd2.cd_demo_sk AND + c_current_hdemo_sk = hd2.hd_demo_sk AND + c_current_addr_sk = ad2.ca_address_sk and + c_first_sales_date_sk = d2.d_date_sk and + c_first_shipto_date_sk = d3.d_date_sk and + ss_promo_sk = p_promo_sk and + hd1.hd_income_band_sk = ib1.ib_income_band_sk and + hd2.hd_income_band_sk = ib2.ib_income_band_sk and + cd1.cd_marital_status <> cd2.cd_marital_status and + i_color in ('purple','burlywood','indian','spring','floral','medium') and + i_current_price between 64 and 64 + 10 and + i_current_price between 64 + 1 and 64 + 15 +group by i_product_name + ,i_item_sk + ,s_store_name + ,s_zip + ,ad1.ca_street_number + ,ad1.ca_street_name + ,ad1.ca_city + ,ad1.ca_zip + ,ad2.ca_street_number + ,ad2.ca_street_name + ,ad2.ca_city + ,ad2.ca_zip + ,d1.d_year + ,d2.d_year + ,d3.d_year +) +select cs1.product_name + ,cs1.store_name + ,cs1.store_zip + ,cs1.b_street_number + ,cs1.b_street_name + ,cs1.b_city + ,cs1.b_zip + ,cs1.c_street_number + ,cs1.c_street_name + ,cs1.c_city + ,cs1.c_zip + ,cs1.syear + ,cs1.cnt + ,cs1.s1 as s11 + ,cs1.s2 as s21 + ,cs1.s3 as s31 + ,cs2.s1 as s12 + ,cs2.s2 as s22 + ,cs2.s3 as s32 + ,cs2.syear as syear_2 + ,cs2.cnt as cnt_2 +from cross_sales cs1,cross_sales cs2 +where cs1.item_sk=cs2.item_sk and + cs1.syear = 1999 and + cs2.syear = 1999 + 1 and + cs2.cnt <= cs1.cnt and + cs1.store_name = cs2.store_name and + cs1.store_zip = cs2.store_zip +order by cs1.product_name + ,cs1.store_name + ,cnt_2; +-- end query 64 in stream 0 using template query64.tpl diff --git a/tests/tpcdsbench/src/queries/query65.sql b/tests/tpcdsbench/src/queries/query65.sql new file mode 100644 index 000000000..9de91f156 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query65.sql @@ -0,0 +1,28 @@ +-- start query 65 in stream 0 using template query65.tpl and seed QUALIFICATION + select + s_store_name, + i_item_desc, + sc.revenue, + i_current_price, + i_wholesale_cost, + i_brand + from store, item, + (select ss_store_sk, avg(revenue) as ave + from + (select ss_store_sk, ss_item_sk, + sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1176 and 1176+11 + group by ss_store_sk, ss_item_sk) sa + group by ss_store_sk) sb, + (select ss_store_sk, ss_item_sk, sum(ss_sales_price) as revenue + from store_sales, date_dim + where ss_sold_date_sk = d_date_sk and d_month_seq between 1176 and 1176+11 + group by ss_store_sk, ss_item_sk) sc + where sb.ss_store_sk = sc.ss_store_sk and + sc.revenue <= 0.1 * sb.ave and + s_store_sk = sc.ss_store_sk and + i_item_sk = sc.ss_item_sk + order by s_store_name, i_item_desc + limit 100; +-- end query 65 in stream 0 using template query65.tpl diff --git a/tests/tpcdsbench/src/queries/query66.sql b/tests/tpcdsbench/src/queries/query66.sql new file mode 100644 index 000000000..6046c5121 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query66.sql @@ -0,0 +1,219 @@ +-- start query 66 in stream 0 using template query66.tpl and seed QUALIFICATION + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + ,sum(jan_sales) as jan_sales + ,sum(feb_sales) as feb_sales + ,sum(mar_sales) as mar_sales + ,sum(apr_sales) as apr_sales + ,sum(may_sales) as may_sales + ,sum(jun_sales) as jun_sales + ,sum(jul_sales) as jul_sales + ,sum(aug_sales) as aug_sales + ,sum(sep_sales) as sep_sales + ,sum(oct_sales) as oct_sales + ,sum(nov_sales) as nov_sales + ,sum(dec_sales) as dec_sales + ,sum(jan_sales/w_warehouse_sq_ft) as jan_sales_per_sq_foot + ,sum(feb_sales/w_warehouse_sq_ft) as feb_sales_per_sq_foot + ,sum(mar_sales/w_warehouse_sq_ft) as mar_sales_per_sq_foot + ,sum(apr_sales/w_warehouse_sq_ft) as apr_sales_per_sq_foot + ,sum(may_sales/w_warehouse_sq_ft) as may_sales_per_sq_foot + ,sum(jun_sales/w_warehouse_sq_ft) as jun_sales_per_sq_foot + ,sum(jul_sales/w_warehouse_sq_ft) as jul_sales_per_sq_foot + ,sum(aug_sales/w_warehouse_sq_ft) as aug_sales_per_sq_foot + ,sum(sep_sales/w_warehouse_sq_ft) as sep_sales_per_sq_foot + ,sum(oct_sales/w_warehouse_sq_ft) as oct_sales_per_sq_foot + ,sum(nov_sales/w_warehouse_sq_ft) as nov_sales_per_sq_foot + ,sum(dec_sales/w_warehouse_sq_ft) as dec_sales_per_sq_foot + ,sum(jan_net) as jan_net + ,sum(feb_net) as feb_net + ,sum(mar_net) as mar_net + ,sum(apr_net) as apr_net + ,sum(may_net) as may_net + ,sum(jun_net) as jun_net + ,sum(jul_net) as jul_net + ,sum(aug_net) as aug_net + ,sum(sep_net) as sep_net + ,sum(oct_net) as oct_net + ,sum(nov_net) as nov_net + ,sum(dec_net) as dec_net + from ( + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,concat('DHL' , ',' , 'BARIAN') as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then ws_ext_sales_price* ws_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then ws_ext_sales_price* ws_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then ws_ext_sales_price* ws_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then ws_ext_sales_price* ws_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then ws_ext_sales_price* ws_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then ws_ext_sales_price* ws_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then ws_ext_sales_price* ws_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then ws_ext_sales_price* ws_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then ws_ext_sales_price* ws_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then ws_ext_sales_price* ws_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then ws_ext_sales_price* ws_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then ws_ext_sales_price* ws_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then ws_net_paid * ws_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then ws_net_paid * ws_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then ws_net_paid * ws_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then ws_net_paid * ws_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then ws_net_paid * ws_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then ws_net_paid * ws_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then ws_net_paid * ws_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then ws_net_paid * ws_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then ws_net_paid * ws_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then ws_net_paid * ws_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then ws_net_paid * ws_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then ws_net_paid * ws_quantity else 0 end) as dec_net + from + web_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + ws_warehouse_sk = w_warehouse_sk + and ws_sold_date_sk = d_date_sk + and ws_sold_time_sk = t_time_sk + and ws_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 30838 and 30838+28800 + and sm_carrier in ('DHL','BARIAN') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + union all + select + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,concat('DHL' , ',' , 'BARIAN') as ship_carriers + ,d_year as year + ,sum(case when d_moy = 1 + then cs_sales_price* cs_quantity else 0 end) as jan_sales + ,sum(case when d_moy = 2 + then cs_sales_price* cs_quantity else 0 end) as feb_sales + ,sum(case when d_moy = 3 + then cs_sales_price* cs_quantity else 0 end) as mar_sales + ,sum(case when d_moy = 4 + then cs_sales_price* cs_quantity else 0 end) as apr_sales + ,sum(case when d_moy = 5 + then cs_sales_price* cs_quantity else 0 end) as may_sales + ,sum(case when d_moy = 6 + then cs_sales_price* cs_quantity else 0 end) as jun_sales + ,sum(case when d_moy = 7 + then cs_sales_price* cs_quantity else 0 end) as jul_sales + ,sum(case when d_moy = 8 + then cs_sales_price* cs_quantity else 0 end) as aug_sales + ,sum(case when d_moy = 9 + then cs_sales_price* cs_quantity else 0 end) as sep_sales + ,sum(case when d_moy = 10 + then cs_sales_price* cs_quantity else 0 end) as oct_sales + ,sum(case when d_moy = 11 + then cs_sales_price* cs_quantity else 0 end) as nov_sales + ,sum(case when d_moy = 12 + then cs_sales_price* cs_quantity else 0 end) as dec_sales + ,sum(case when d_moy = 1 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as jan_net + ,sum(case when d_moy = 2 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as feb_net + ,sum(case when d_moy = 3 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as mar_net + ,sum(case when d_moy = 4 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as apr_net + ,sum(case when d_moy = 5 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as may_net + ,sum(case when d_moy = 6 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as jun_net + ,sum(case when d_moy = 7 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as jul_net + ,sum(case when d_moy = 8 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as aug_net + ,sum(case when d_moy = 9 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as sep_net + ,sum(case when d_moy = 10 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as oct_net + ,sum(case when d_moy = 11 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as nov_net + ,sum(case when d_moy = 12 + then cs_net_paid_inc_tax * cs_quantity else 0 end) as dec_net + from + catalog_sales + ,warehouse + ,date_dim + ,time_dim + ,ship_mode + where + cs_warehouse_sk = w_warehouse_sk + and cs_sold_date_sk = d_date_sk + and cs_sold_time_sk = t_time_sk + and cs_ship_mode_sk = sm_ship_mode_sk + and d_year = 2001 + and t_time between 30838 AND 30838+28800 + and sm_carrier in ('DHL','BARIAN') + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,d_year + ) x + group by + w_warehouse_name + ,w_warehouse_sq_ft + ,w_city + ,w_county + ,w_state + ,w_country + ,ship_carriers + ,year + order by w_warehouse_name + limit 100; +-- end query 66 in stream 0 using template query66.tpl diff --git a/tests/tpcdsbench/src/queries/query67.sql b/tests/tpcdsbench/src/queries/query67.sql new file mode 100644 index 000000000..34a08325f --- /dev/null +++ b/tests/tpcdsbench/src/queries/query67.sql @@ -0,0 +1,43 @@ +-- start query 67 in stream 0 using template query67.tpl and seed QUALIFICATION + select * +from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rank() over (partition by i_category order by sumsales desc) rk + from (select i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sum(coalesce(ss_sales_price*ss_quantity,0)) sumsales + from store_sales + ,date_dim + ,store + ,item + where ss_sold_date_sk=d_date_sk + and ss_item_sk=i_item_sk + and ss_store_sk = s_store_sk + and d_month_seq between 1200 and 1200+11 + group by rollup(i_category, i_class, i_brand, i_product_name, d_year, d_qoy, d_moy,s_store_id))dw1) dw2 +where rk <= 100 +order by i_category + ,i_class + ,i_brand + ,i_product_name + ,d_year + ,d_qoy + ,d_moy + ,s_store_id + ,sumsales + ,rk + limit 100; +-- end query 67 in stream 0 using template query67.tpl diff --git a/tests/tpcdsbench/src/queries/query68.sql b/tests/tpcdsbench/src/queries/query68.sql new file mode 100644 index 000000000..31e1b8cd6 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query68.sql @@ -0,0 +1,41 @@ +-- start query 68 in stream 0 using template query68.tpl and seed QUALIFICATION + select c_last_name + ,c_first_name + ,ca_city + ,bought_city + ,ss_ticket_number + ,extended_price + ,extended_tax + ,list_price + from (select ss_ticket_number + ,ss_customer_sk + ,ca_city bought_city + ,sum(ss_ext_sales_price) extended_price + ,sum(ss_ext_list_price) list_price + ,sum(ss_ext_tax) extended_tax + from store_sales + ,date_dim + ,store + ,household_demographics + ,customer_address + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and store_sales.ss_addr_sk = customer_address.ca_address_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_dep_count = 4 or + household_demographics.hd_vehicle_count= 3) + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_city in ('Fairview','Midway') + group by ss_ticket_number + ,ss_customer_sk + ,ss_addr_sk,ca_city) dn + ,customer + ,customer_address current_addr + where ss_customer_sk = c_customer_sk + and customer.c_current_addr_sk = current_addr.ca_address_sk + and current_addr.ca_city <> bought_city + order by c_last_name + ,ss_ticket_number + limit 100; +-- end query 68 in stream 0 using template query68.tpl diff --git a/tests/tpcdsbench/src/queries/query69.sql b/tests/tpcdsbench/src/queries/query69.sql new file mode 100644 index 000000000..b7dfa0194 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query69.sql @@ -0,0 +1,46 @@ +-- start query 69 in stream 0 using template query69.tpl and seed QUALIFICATION + select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3 + from + customer c,customer_address ca,customer_demographics + where + c.c_current_addr_sk = ca.ca_address_sk and + ca_state in ('KY','GA','NM') and + cd_demo_sk = c.c_current_cdemo_sk and + exists (select * + from store_sales,date_dim + where c.c_customer_sk = ss_customer_sk and + ss_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 and 4+2) and + (not exists (select * + from web_sales,date_dim + where c.c_customer_sk = ws_bill_customer_sk and + ws_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 and 4+2) and + not exists (select * + from catalog_sales,date_dim + where c.c_customer_sk = cs_ship_customer_sk and + cs_sold_date_sk = d_date_sk and + d_year = 2001 and + d_moy between 4 and 4+2)) + group by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + order by cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating + limit 100; +-- end query 69 in stream 0 using template query69.tpl diff --git a/tests/tpcdsbench/src/queries/query70.sql b/tests/tpcdsbench/src/queries/query70.sql new file mode 100644 index 000000000..5f4d036db --- /dev/null +++ b/tests/tpcdsbench/src/queries/query70.sql @@ -0,0 +1,37 @@ +-- start query 70 in stream 0 using template query70.tpl and seed QUALIFICATION + select + sum(ss_net_profit) as total_sum + ,s_state + ,s_county + ,grouping(s_state)+grouping(s_county) as lochierarchy + ,rank() over ( + partition by grouping(s_state)+grouping(s_county), + case when grouping(s_county) = 0 then s_state end + order by sum(ss_net_profit) desc) as rank_within_parent + from + store_sales + ,date_dim d1 + ,store + where + d1.d_month_seq between 1200 and 1200+11 + and d1.d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + and s_state in + ( select s_state + from (select s_state as s_state, + rank() over ( partition by s_state order by sum(ss_net_profit) desc) as ranking + from store_sales, store, date_dim + where d_month_seq between 1200 and 1200+11 + and d_date_sk = ss_sold_date_sk + and s_store_sk = ss_store_sk + group by s_state + ) tmp1 + where ranking <= 5 + ) + group by rollup(s_state,s_county) + order by + lochierarchy desc + ,case when lochierarchy = 0 then s_state end + ,rank_within_parent + limit 100; +-- end query 70 in stream 0 using template query70.tpl diff --git a/tests/tpcdsbench/src/queries/query71.sql b/tests/tpcdsbench/src/queries/query71.sql new file mode 100644 index 000000000..98ced9e59 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query71.sql @@ -0,0 +1,39 @@ +-- start query 71 in stream 0 using template query71.tpl and seed QUALIFICATION + select i_brand_id brand_id, i_brand brand,t_hour,t_minute, + sum(ext_price) ext_price + from item, (select ws_ext_sales_price as ext_price, + ws_sold_date_sk as sold_date_sk, + ws_item_sk as sold_item_sk, + ws_sold_time_sk as time_sk + from web_sales,date_dim + where d_date_sk = ws_sold_date_sk + and d_moy=11 + and d_year=1999 + union all + select cs_ext_sales_price as ext_price, + cs_sold_date_sk as sold_date_sk, + cs_item_sk as sold_item_sk, + cs_sold_time_sk as time_sk + from catalog_sales,date_dim + where d_date_sk = cs_sold_date_sk + and d_moy=11 + and d_year=1999 + union all + select ss_ext_sales_price as ext_price, + ss_sold_date_sk as sold_date_sk, + ss_item_sk as sold_item_sk, + ss_sold_time_sk as time_sk + from store_sales,date_dim + where d_date_sk = ss_sold_date_sk + and d_moy=11 + and d_year=1999 + ) tmp,time_dim + where + sold_item_sk = i_item_sk + and i_manager_id=1 + and time_sk = t_time_sk + and (t_meal_time = 'breakfast' or t_meal_time = 'dinner') + group by i_brand, i_brand_id,t_hour,t_minute + order by ext_price desc, i_brand_id + ; +-- end query 71 in stream 0 using template query71.tpl diff --git a/tests/tpcdsbench/src/queries/query72.sql b/tests/tpcdsbench/src/queries/query72.sql new file mode 100644 index 000000000..aa5185dec --- /dev/null +++ b/tests/tpcdsbench/src/queries/query72.sql @@ -0,0 +1,28 @@ +-- start query 72 in stream 0 using template query72.tpl and seed QUALIFICATION + select i_item_desc + ,w_warehouse_name + ,d1.d_week_seq + ,sum(case when p_promo_sk is null then 1 else 0 end) no_promo + ,sum(case when p_promo_sk is not null then 1 else 0 end) promo + ,count(*) total_cnt +from catalog_sales +join inventory on (cs_item_sk = inv_item_sk) +join warehouse on (w_warehouse_sk=inv_warehouse_sk) +join item on (i_item_sk = cs_item_sk) +join customer_demographics on (cs_bill_cdemo_sk = cd_demo_sk) +join household_demographics on (cs_bill_hdemo_sk = hd_demo_sk) +join date_dim d1 on (cs_sold_date_sk = d1.d_date_sk) +join date_dim d2 on (inv_date_sk = d2.d_date_sk) +join date_dim d3 on (cs_ship_date_sk = d3.d_date_sk) +left outer join promotion on (cs_promo_sk=p_promo_sk) +left outer join catalog_returns on (cr_item_sk = cs_item_sk and cr_order_number = cs_order_number) +where d1.d_week_seq = d2.d_week_seq + and inv_quantity_on_hand < cs_quantity + and d3.d_date > date_add(cast(d1.d_date as date),5) + and hd_buy_potential = '>10000' + and d1.d_year = 1999 + and cd_marital_status = 'D' +group by i_item_desc,w_warehouse_name,d1.d_week_seq +order by total_cnt desc, i_item_desc, w_warehouse_name, d_week_seq + limit 100; +-- end query 72 in stream 0 using template query72.tpl diff --git a/tests/tpcdsbench/src/queries/query73.sql b/tests/tpcdsbench/src/queries/query73.sql new file mode 100644 index 000000000..3ad91f3ea --- /dev/null +++ b/tests/tpcdsbench/src/queries/query73.sql @@ -0,0 +1,27 @@ +-- start query 73 in stream 0 using template query73.tpl and seed QUALIFICATION + select c_last_name + ,c_first_name + ,c_salutation + ,c_preferred_cust_flag + ,ss_ticket_number + ,cnt from + (select ss_ticket_number + ,ss_customer_sk + ,count(*) cnt + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and date_dim.d_dom between 1 and 2 + and (household_demographics.hd_buy_potential = '>10000' or + household_demographics.hd_buy_potential = 'Unknown') + and household_demographics.hd_vehicle_count > 0 + and case when household_demographics.hd_vehicle_count > 0 then + household_demographics.hd_dep_count/ household_demographics.hd_vehicle_count else null end > 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_county in ('Williamson County','Franklin Parish','Bronx County','Orange County') + group by ss_ticket_number,ss_customer_sk) dj,customer + where ss_customer_sk = c_customer_sk + and cnt between 1 and 5 + order by cnt desc, c_last_name asc; +-- end query 73 in stream 0 using template query73.tpl diff --git a/tests/tpcdsbench/src/queries/query74.sql b/tests/tpcdsbench/src/queries/query74.sql new file mode 100644 index 000000000..8d4fcc1d5 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query74.sql @@ -0,0 +1,60 @@ +-- start query 74 in stream 0 using template query74.tpl and seed QUALIFICATION + with year_total as ( + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,sum(ss_net_paid) year_total + ,'s' sale_type + from customer + ,store_sales + ,date_dim + where c_customer_sk = ss_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + union all + select c_customer_id customer_id + ,c_first_name customer_first_name + ,c_last_name customer_last_name + ,d_year as year + ,sum(ws_net_paid) year_total + ,'w' sale_type + from customer + ,web_sales + ,date_dim + where c_customer_sk = ws_bill_customer_sk + and ws_sold_date_sk = d_date_sk + and d_year in (2001,2001+1) + group by c_customer_id + ,c_first_name + ,c_last_name + ,d_year + ) + select + t_s_secyear.customer_id, t_s_secyear.customer_first_name, t_s_secyear.customer_last_name + from year_total t_s_firstyear + ,year_total t_s_secyear + ,year_total t_w_firstyear + ,year_total t_w_secyear + where t_s_secyear.customer_id = t_s_firstyear.customer_id + and t_s_firstyear.customer_id = t_w_secyear.customer_id + and t_s_firstyear.customer_id = t_w_firstyear.customer_id + and t_s_firstyear.sale_type = 's' + and t_w_firstyear.sale_type = 'w' + and t_s_secyear.sale_type = 's' + and t_w_secyear.sale_type = 'w' + and t_s_firstyear.year = 2001 + and t_s_secyear.year = 2001+1 + and t_w_firstyear.year = 2001 + and t_w_secyear.year = 2001+1 + and t_s_firstyear.year_total > 0 + and t_w_firstyear.year_total > 0 + and case when t_w_firstyear.year_total > 0 then t_w_secyear.year_total / t_w_firstyear.year_total else null end + > case when t_s_firstyear.year_total > 0 then t_s_secyear.year_total / t_s_firstyear.year_total else null end + order by 1,1,1 + limit 100; +-- end query 74 in stream 0 using template query74.tpl diff --git a/tests/tpcdsbench/src/queries/query75.sql b/tests/tpcdsbench/src/queries/query75.sql new file mode 100644 index 000000000..28610ecf1 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query75.sql @@ -0,0 +1,69 @@ +-- start query 75 in stream 0 using template query75.tpl and seed QUALIFICATION +WITH all_sales AS ( + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,SUM(sales_cnt) AS sales_cnt + ,SUM(sales_amt) AS sales_amt + FROM (SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,cs_quantity - COALESCE(cr_return_quantity,0) AS sales_cnt + ,cs_ext_sales_price - COALESCE(cr_return_amount,0.0) AS sales_amt + FROM catalog_sales JOIN item ON i_item_sk=cs_item_sk + JOIN date_dim ON d_date_sk=cs_sold_date_sk + LEFT JOIN catalog_returns ON (cs_order_number=cr_order_number + AND cs_item_sk=cr_item_sk) + WHERE i_category='Books' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ss_quantity - COALESCE(sr_return_quantity,0) AS sales_cnt + ,ss_ext_sales_price - COALESCE(sr_return_amt,0.0) AS sales_amt + FROM store_sales JOIN item ON i_item_sk=ss_item_sk + JOIN date_dim ON d_date_sk=ss_sold_date_sk + LEFT JOIN store_returns ON (ss_ticket_number=sr_ticket_number + AND ss_item_sk=sr_item_sk) + WHERE i_category='Books' + UNION + SELECT d_year + ,i_brand_id + ,i_class_id + ,i_category_id + ,i_manufact_id + ,ws_quantity - COALESCE(wr_return_quantity,0) AS sales_cnt + ,ws_ext_sales_price - COALESCE(wr_return_amt,0.0) AS sales_amt + FROM web_sales JOIN item ON i_item_sk=ws_item_sk + JOIN date_dim ON d_date_sk=ws_sold_date_sk + LEFT JOIN web_returns ON (ws_order_number=wr_order_number + AND ws_item_sk=wr_item_sk) + WHERE i_category='Books') sales_detail + GROUP BY d_year, i_brand_id, i_class_id, i_category_id, i_manufact_id) + SELECT prev_yr.d_year AS prev_year + ,curr_yr.d_year AS year + ,curr_yr.i_brand_id + ,curr_yr.i_class_id + ,curr_yr.i_category_id + ,curr_yr.i_manufact_id + ,prev_yr.sales_cnt AS prev_yr_cnt + ,curr_yr.sales_cnt AS curr_yr_cnt + ,curr_yr.sales_cnt-prev_yr.sales_cnt AS sales_cnt_diff + ,curr_yr.sales_amt-prev_yr.sales_amt AS sales_amt_diff + FROM all_sales curr_yr, all_sales prev_yr + WHERE curr_yr.i_brand_id=prev_yr.i_brand_id + AND curr_yr.i_class_id=prev_yr.i_class_id + AND curr_yr.i_category_id=prev_yr.i_category_id + AND curr_yr.i_manufact_id=prev_yr.i_manufact_id + AND curr_yr.d_year=2002 + AND prev_yr.d_year=2002-1 + AND CAST(curr_yr.sales_cnt AS DECIMAL(17,2))/CAST(prev_yr.sales_cnt AS DECIMAL(17,2))<0.9 + ORDER BY sales_cnt_diff + limit 100; +-- end query 75 in stream 0 using template query75.tpl diff --git a/tests/tpcdsbench/src/queries/query76.sql b/tests/tpcdsbench/src/queries/query76.sql new file mode 100644 index 000000000..3ca940734 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query76.sql @@ -0,0 +1,23 @@ +-- start query 76 in stream 0 using template query76.tpl and seed QUALIFICATION + select channel, col_name, d_year, d_qoy, i_category, COUNT(*) sales_cnt, SUM(ext_sales_price) sales_amt FROM ( + SELECT 'store' as channel, 'ss_store_sk' col_name, d_year, d_qoy, i_category, ss_ext_sales_price ext_sales_price + FROM store_sales, item, date_dim + WHERE ss_store_sk IS NULL + AND ss_sold_date_sk=d_date_sk + AND ss_item_sk=i_item_sk + UNION ALL + SELECT 'web' as channel, 'ws_ship_customer_sk' col_name, d_year, d_qoy, i_category, ws_ext_sales_price ext_sales_price + FROM web_sales, item, date_dim + WHERE ws_ship_customer_sk IS NULL + AND ws_sold_date_sk=d_date_sk + AND ws_item_sk=i_item_sk + UNION ALL + SELECT 'catalog' as channel, 'cs_ship_addr_sk' col_name, d_year, d_qoy, i_category, cs_ext_sales_price ext_sales_price + FROM catalog_sales, item, date_dim + WHERE cs_ship_addr_sk IS NULL + AND cs_sold_date_sk=d_date_sk + AND cs_item_sk=i_item_sk) foo +GROUP BY channel, col_name, d_year, d_qoy, i_category +ORDER BY channel, col_name, d_year, d_qoy, i_category + limit 100; +-- end query 76 in stream 0 using template query76.tpl diff --git a/tests/tpcdsbench/src/queries/query77.sql b/tests/tpcdsbench/src/queries/query77.sql new file mode 100644 index 000000000..35fe6e0cf --- /dev/null +++ b/tests/tpcdsbench/src/queries/query77.sql @@ -0,0 +1,107 @@ +-- start query 77 in stream 0 using template query77.tpl and seed QUALIFICATION + with ss as + (select s_store_sk, + sum(ss_ext_sales_price) as sales, + sum(ss_net_profit) as profit + from store_sales, + date_dim, + store + where ss_sold_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + and ss_store_sk = s_store_sk + group by s_store_sk) + , + sr as + (select s_store_sk, + sum(sr_return_amt) as returns, + sum(sr_net_loss) as profit_loss + from store_returns, + date_dim, + store + where sr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + and sr_store_sk = s_store_sk + group by s_store_sk), + cs as + (select cs_call_center_sk, + sum(cs_ext_sales_price) as sales, + sum(cs_net_profit) as profit + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + group by cs_call_center_sk + ), + cr as + (select cr_call_center_sk, + sum(cr_return_amount) as returns, + sum(cr_net_loss) as profit_loss + from catalog_returns, + date_dim + where cr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + group by cr_call_center_sk + ), + ws as + ( select wp_web_page_sk, + sum(ws_ext_sales_price) as sales, + sum(ws_net_profit) as profit + from web_sales, + date_dim, + web_page + where ws_sold_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + and ws_web_page_sk = wp_web_page_sk + group by wp_web_page_sk), + wr as + (select wp_web_page_sk, + sum(wr_return_amt) as returns, + sum(wr_net_loss) as profit_loss + from web_returns, + date_dim, + web_page + where wr_returned_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + and wr_web_page_sk = wp_web_page_sk + group by wp_web_page_sk) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , ss.s_store_sk as id + , sales + , coalesce(returns, 0) as returns + , (profit - coalesce(profit_loss,0)) as profit + from ss left join sr + on ss.s_store_sk = sr.s_store_sk + union all + select 'catalog channel' as channel + , cs_call_center_sk as id + , sales + , returns + , (profit - profit_loss) as profit + from cs + , cr + union all + select 'web channel' as channel + , ws.wp_web_page_sk as id + , sales + , coalesce(returns, 0) returns + , (profit - coalesce(profit_loss,0)) as profit + from ws left join wr + on ws.wp_web_page_sk = wr.wp_web_page_sk + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100; +-- end query 77 in stream 0 using template query77.tpl diff --git a/tests/tpcdsbench/src/queries/query78.sql b/tests/tpcdsbench/src/queries/query78.sql new file mode 100644 index 000000000..a4e0bf02a --- /dev/null +++ b/tests/tpcdsbench/src/queries/query78.sql @@ -0,0 +1,57 @@ +-- start query 78 in stream 0 using template query78.tpl and seed QUALIFICATION +with ws as + (select d_year AS ws_sold_year, ws_item_sk, + ws_bill_customer_sk ws_customer_sk, + sum(ws_quantity) ws_qty, + sum(ws_wholesale_cost) ws_wc, + sum(ws_sales_price) ws_sp + from web_sales + left join web_returns on wr_order_number=ws_order_number and ws_item_sk=wr_item_sk + join date_dim on ws_sold_date_sk = d_date_sk + where wr_order_number is null + group by d_year, ws_item_sk, ws_bill_customer_sk + ), +cs as + (select d_year AS cs_sold_year, cs_item_sk, + cs_bill_customer_sk cs_customer_sk, + sum(cs_quantity) cs_qty, + sum(cs_wholesale_cost) cs_wc, + sum(cs_sales_price) cs_sp + from catalog_sales + left join catalog_returns on cr_order_number=cs_order_number and cs_item_sk=cr_item_sk + join date_dim on cs_sold_date_sk = d_date_sk + where cr_order_number is null + group by d_year, cs_item_sk, cs_bill_customer_sk + ), +ss as + (select d_year AS ss_sold_year, ss_item_sk, + ss_customer_sk, + sum(ss_quantity) ss_qty, + sum(ss_wholesale_cost) ss_wc, + sum(ss_sales_price) ss_sp + from store_sales + left join store_returns on sr_ticket_number=ss_ticket_number and ss_item_sk=sr_item_sk + join date_dim on ss_sold_date_sk = d_date_sk + where sr_ticket_number is null + group by d_year, ss_item_sk, ss_customer_sk + ) + select +ss_sold_year, ss_item_sk, ss_customer_sk, +round(ss_qty/(coalesce(ws_qty,0)+coalesce(cs_qty,0)),2) ratio, +ss_qty store_qty, ss_wc store_wholesale_cost, ss_sp store_sales_price, +coalesce(ws_qty,0)+coalesce(cs_qty,0) other_chan_qty, +coalesce(ws_wc,0)+coalesce(cs_wc,0) other_chan_wholesale_cost, +coalesce(ws_sp,0)+coalesce(cs_sp,0) other_chan_sales_price +from ss +left join ws on (ws_sold_year=ss_sold_year and ws_item_sk=ss_item_sk and ws_customer_sk=ss_customer_sk) +left join cs on (cs_sold_year=ss_sold_year and cs_item_sk=ss_item_sk and cs_customer_sk=ss_customer_sk) +where (coalesce(ws_qty,0)>0 or coalesce(cs_qty, 0)>0) and ss_sold_year=2000 +order by + ss_sold_year, ss_item_sk, ss_customer_sk, + ss_qty desc, ss_wc desc, ss_sp desc, + other_chan_qty, + other_chan_wholesale_cost, + other_chan_sales_price, + round(ss_qty/(coalesce(ws_qty+cs_qty,1)),2) + limit 100; +-- end query 78 in stream 0 using template query78.tpl diff --git a/tests/tpcdsbench/src/queries/query79.sql b/tests/tpcdsbench/src/queries/query79.sql new file mode 100644 index 000000000..50cb2cb15 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query79.sql @@ -0,0 +1,22 @@ +-- start query 79 in stream 0 using template query79.tpl and seed QUALIFICATION + select + c_last_name,c_first_name,substr(s_city,1,30),ss_ticket_number,amt,profit + from + (select ss_ticket_number + ,ss_customer_sk + ,store.s_city + ,sum(ss_coupon_amt) amt + ,sum(ss_net_profit) profit + from store_sales,date_dim,store,household_demographics + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_store_sk = store.s_store_sk + and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk + and (household_demographics.hd_dep_count = 6 or household_demographics.hd_vehicle_count > 2) + and date_dim.d_dow = 1 + and date_dim.d_year in (1999,1999+1,1999+2) + and store.s_number_employees between 200 and 295 + group by ss_ticket_number,ss_customer_sk,ss_addr_sk,store.s_city) ms,customer + where ss_customer_sk = c_customer_sk + order by c_last_name,c_first_name,substr(s_city,1,30), profit + limit 100; +-- end query 79 in stream 0 using template query79.tpl diff --git a/tests/tpcdsbench/src/queries/query80.sql b/tests/tpcdsbench/src/queries/query80.sql new file mode 100644 index 000000000..3c10486f0 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query80.sql @@ -0,0 +1,95 @@ +-- start query 80 in stream 0 using template query80.tpl and seed QUALIFICATION + with ssr as + (select s_store_id as store_id, + sum(ss_ext_sales_price) as sales, + sum(coalesce(sr_return_amt, 0)) as returns, + sum(ss_net_profit - coalesce(sr_net_loss, 0)) as profit + from store_sales left outer join store_returns on + (ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number), + date_dim, + store, + item, + promotion + where ss_sold_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + and ss_store_sk = s_store_sk + and ss_item_sk = i_item_sk + and i_current_price > 50 + and ss_promo_sk = p_promo_sk + and p_channel_tv = 'N' + group by s_store_id) + , + csr as + (select cp_catalog_page_id as catalog_page_id, + sum(cs_ext_sales_price) as sales, + sum(coalesce(cr_return_amount, 0)) as returns, + sum(cs_net_profit - coalesce(cr_net_loss, 0)) as profit + from catalog_sales left outer join catalog_returns on + (cs_item_sk = cr_item_sk and cs_order_number = cr_order_number), + date_dim, + catalog_page, + item, + promotion + where cs_sold_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + and cs_catalog_page_sk = cp_catalog_page_sk + and cs_item_sk = i_item_sk + and i_current_price > 50 + and cs_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by cp_catalog_page_id) + , + wsr as + (select web_site_id, + sum(ws_ext_sales_price) as sales, + sum(coalesce(wr_return_amt, 0)) as returns, + sum(ws_net_profit - coalesce(wr_net_loss, 0)) as profit + from web_sales left outer join web_returns on + (ws_item_sk = wr_item_sk and ws_order_number = wr_order_number), + date_dim, + web_site, + item, + promotion + where ws_sold_date_sk = d_date_sk + and d_date between cast('2000-08-23' as date) + and date_add(cast('2000-08-23' as date), 30 ) + and ws_web_site_sk = web_site_sk + and ws_item_sk = i_item_sk + and i_current_price > 50 + and ws_promo_sk = p_promo_sk + and p_channel_tv = 'N' +group by web_site_id) + select channel + , id + , sum(sales) as sales + , sum(returns) as returns + , sum(profit) as profit + from + (select 'store channel' as channel + , concat( 'store' , store_id ) as id + , sales + , returns + , profit + from ssr + union all + select 'catalog channel' as channel + , concat( 'catalog_page' , catalog_page_id ) as id + , sales + , returns + , profit + from csr + union all + select 'web channel' as channel + , concat( 'web_site' , web_site_id ) as id + , sales + , returns + , profit + from wsr + ) x + group by rollup (channel, id) + order by channel + ,id + limit 100; +-- end query 80 in stream 0 using template query80.tpl diff --git a/tests/tpcdsbench/src/queries/query81.sql b/tests/tpcdsbench/src/queries/query81.sql new file mode 100644 index 000000000..6f3e23e1a --- /dev/null +++ b/tests/tpcdsbench/src/queries/query81.sql @@ -0,0 +1,30 @@ +-- start query 81 in stream 0 using template query81.tpl and seed QUALIFICATION + with customer_total_return as + (select cr_returning_customer_sk as ctr_customer_sk + ,ca_state as ctr_state, + sum(cr_return_amt_inc_tax) as ctr_total_return + from catalog_returns + ,date_dim + ,customer_address + where cr_returned_date_sk = d_date_sk + and d_year =2000 + and cr_returning_addr_sk = ca_address_sk + group by cr_returning_customer_sk + ,ca_state ) + select c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + from customer_total_return ctr1 + ,customer_address + ,customer + where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2 + from customer_total_return ctr2 + where ctr1.ctr_state = ctr2.ctr_state) + and ca_address_sk = c_current_addr_sk + and ca_state = 'GA' + and ctr1.ctr_customer_sk = c_customer_sk + order by c_customer_id,c_salutation,c_first_name,c_last_name,ca_street_number,ca_street_name + ,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset + ,ca_location_type,ctr_total_return + limit 100; +-- end query 81 in stream 0 using template query81.tpl diff --git a/tests/tpcdsbench/src/queries/query82.sql b/tests/tpcdsbench/src/queries/query82.sql new file mode 100644 index 000000000..f2fc69fb1 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query82.sql @@ -0,0 +1,16 @@ +-- start query 82 in stream 0 using template query82.tpl and seed QUALIFICATION + select i_item_id + ,i_item_desc + ,i_current_price + from item, inventory, date_dim, store_sales + where i_current_price between 62 and 62+30 + and inv_item_sk = i_item_sk + and d_date_sk=inv_date_sk + and d_date between cast('2000-05-25' as date) and date_add(cast('2000-05-25' as date), 60 ) + and i_manufact_id in (129,270,821,423) + and inv_quantity_on_hand between 100 and 500 + and ss_item_sk = i_item_sk + group by i_item_id,i_item_desc,i_current_price + order by i_item_id + limit 100; +-- end query 82 in stream 0 using template query82.tpl diff --git a/tests/tpcdsbench/src/queries/query83.sql b/tests/tpcdsbench/src/queries/query83.sql new file mode 100644 index 000000000..2cc2a9373 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query83.sql @@ -0,0 +1,66 @@ +-- start query 83 in stream 0 using template query83.tpl and seed QUALIFICATION + with sr_items as + (select i_item_id item_id, + sum(sr_return_quantity) sr_item_qty + from store_returns, + item, + date_dim + where sr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-30','2000-09-27','2000-11-17'))) + and sr_returned_date_sk = d_date_sk + group by i_item_id), + cr_items as + (select i_item_id item_id, + sum(cr_return_quantity) cr_item_qty + from catalog_returns, + item, + date_dim + where cr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-30','2000-09-27','2000-11-17'))) + and cr_returned_date_sk = d_date_sk + group by i_item_id), + wr_items as + (select i_item_id item_id, + sum(wr_return_quantity) wr_item_qty + from web_returns, + item, + date_dim + where wr_item_sk = i_item_sk + and d_date in + (select d_date + from date_dim + where d_week_seq in + (select d_week_seq + from date_dim + where d_date in ('2000-06-30','2000-09-27','2000-11-17'))) + and wr_returned_date_sk = d_date_sk + group by i_item_id) + select sr_items.item_id + ,sr_item_qty + ,sr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 sr_dev + ,cr_item_qty + ,cr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 cr_dev + ,wr_item_qty + ,wr_item_qty/(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 * 100 wr_dev + ,(sr_item_qty+cr_item_qty+wr_item_qty)/3.0 average + from sr_items + ,cr_items + ,wr_items + where sr_items.item_id=cr_items.item_id + and sr_items.item_id=wr_items.item_id + order by sr_items.item_id + ,sr_item_qty + limit 100; +-- end query 83 in stream 0 using template query83.tpl diff --git a/tests/tpcdsbench/src/queries/query84.sql b/tests/tpcdsbench/src/queries/query84.sql new file mode 100644 index 000000000..e1a356918 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query84.sql @@ -0,0 +1,20 @@ +-- start query 84 in stream 0 using template query84.tpl and seed QUALIFICATION + select c_customer_id as customer_id + ,concat(c_last_name , ', ' , coalesce(c_first_name,'')) as customername + from customer + ,customer_address + ,customer_demographics + ,household_demographics + ,income_band + ,store_returns + where ca_city = 'Edgewood' + and c_current_addr_sk = ca_address_sk + and ib_lower_bound >= 38128 + and ib_upper_bound <= 38128 + 50000 + and ib_income_band_sk = hd_income_band_sk + and cd_demo_sk = c_current_cdemo_sk + and hd_demo_sk = c_current_hdemo_sk + and sr_cdemo_sk = cd_demo_sk + order by c_customer_id + limit 100; +-- end query 84 in stream 0 using template query84.tpl diff --git a/tests/tpcdsbench/src/queries/query85.sql b/tests/tpcdsbench/src/queries/query85.sql new file mode 100644 index 000000000..fba049d5d --- /dev/null +++ b/tests/tpcdsbench/src/queries/query85.sql @@ -0,0 +1,83 @@ +-- start query 85 in stream 0 using template query85.tpl and seed QUALIFICATION + select substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + from web_sales, web_returns, web_page, customer_demographics cd1, + customer_demographics cd2, customer_address, date_dim, reason + where ws_web_page_sk = wp_web_page_sk + and ws_item_sk = wr_item_sk + and ws_order_number = wr_order_number + and ws_sold_date_sk = d_date_sk and d_year = 2000 + and cd1.cd_demo_sk = wr_refunded_cdemo_sk + and cd2.cd_demo_sk = wr_returning_cdemo_sk + and ca_address_sk = wr_refunded_addr_sk + and r_reason_sk = wr_reason_sk + and + ( + ( + cd1.cd_marital_status = 'M' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'Advanced Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 100.00 and 150.00 + ) + or + ( + cd1.cd_marital_status = 'S' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = 'College' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 50.00 and 100.00 + ) + or + ( + cd1.cd_marital_status = 'W' + and + cd1.cd_marital_status = cd2.cd_marital_status + and + cd1.cd_education_status = '2 yr Degree' + and + cd1.cd_education_status = cd2.cd_education_status + and + ws_sales_price between 150.00 and 200.00 + ) + ) + and + ( + ( + ca_country = 'United States' + and + ca_state in ('IN', 'OH', 'NJ') + and ws_net_profit between 100 and 200 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('WI', 'CT', 'KY') + and ws_net_profit between 150 and 300 + ) + or + ( + ca_country = 'United States' + and + ca_state in ('LA', 'IA', 'AR') + and ws_net_profit between 50 and 250 + ) + ) +group by r_reason_desc +order by substr(r_reason_desc,1,20) + ,avg(ws_quantity) + ,avg(wr_refunded_cash) + ,avg(wr_fee) + limit 100; +-- end query 85 in stream 0 using template query85.tpl diff --git a/tests/tpcdsbench/src/queries/query86.sql b/tests/tpcdsbench/src/queries/query86.sql new file mode 100644 index 000000000..d48327c9d --- /dev/null +++ b/tests/tpcdsbench/src/queries/query86.sql @@ -0,0 +1,25 @@ +-- start query 86 in stream 0 using template query86.tpl and seed QUALIFICATION + select + sum(ws_net_paid) as total_sum + ,i_category + ,i_class + ,grouping(i_category)+grouping(i_class) as lochierarchy + ,rank() over ( + partition by grouping(i_category)+grouping(i_class), + case when grouping(i_class) = 0 then i_category end + order by sum(ws_net_paid) desc) as rank_within_parent + from + web_sales + ,date_dim d1 + ,item + where + d1.d_month_seq between 1200 and 1200+11 + and d1.d_date_sk = ws_sold_date_sk + and i_item_sk = ws_item_sk + group by rollup(i_category,i_class) + order by + lochierarchy desc, + case when lochierarchy = 0 then i_category end, + rank_within_parent + limit 100; +-- end query 86 in stream 0 using template query86.tpl diff --git a/tests/tpcdsbench/src/queries/query87.sql b/tests/tpcdsbench/src/queries/query87.sql new file mode 100644 index 000000000..851e23337 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query87.sql @@ -0,0 +1,22 @@ +-- start query 87 in stream 0 using template query87.tpl and seed QUALIFICATION +select count(*) +from ((select distinct c_last_name, c_first_name, d_date + from store_sales, date_dim, customer + where store_sales.ss_sold_date_sk = date_dim.d_date_sk + and store_sales.ss_customer_sk = customer.c_customer_sk + and d_month_seq between 1200 and 1200+11) + except + (select distinct c_last_name, c_first_name, d_date + from catalog_sales, date_dim, customer + where catalog_sales.cs_sold_date_sk = date_dim.d_date_sk + and catalog_sales.cs_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1200 and 1200+11) + except + (select distinct c_last_name, c_first_name, d_date + from web_sales, date_dim, customer + where web_sales.ws_sold_date_sk = date_dim.d_date_sk + and web_sales.ws_bill_customer_sk = customer.c_customer_sk + and d_month_seq between 1200 and 1200+11) +) cool_cust +; +-- end query 87 in stream 0 using template query87.tpl diff --git a/tests/tpcdsbench/src/queries/query88.sql b/tests/tpcdsbench/src/queries/query88.sql new file mode 100644 index 000000000..c216bb8e4 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query88.sql @@ -0,0 +1,93 @@ +-- start query 88 in stream 0 using template query88.tpl and seed QUALIFICATION +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 4 and household_demographics.hd_vehicle_count<=4+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2)) + and store.s_store_name = 'ese') s8 +; +-- end query 88 in stream 0 using template query88.tpl diff --git a/tests/tpcdsbench/src/queries/query89.sql b/tests/tpcdsbench/src/queries/query89.sql new file mode 100644 index 000000000..4fe558504 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query89.sql @@ -0,0 +1,27 @@ +-- start query 89 in stream 0 using template query89.tpl and seed QUALIFICATION + select * +from( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (1999) and + ((i_category in ('Books','Electronics','Sports') and + i_class in ('computers','stereo','football') + ) + or (i_category in ('Men','Jewelry','Women') and + i_class in ('shirts','birdal','dresses') + )) +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name + limit 100; +-- end query 89 in stream 0 using template query89.tpl diff --git a/tests/tpcdsbench/src/queries/query90.sql b/tests/tpcdsbench/src/queries/query90.sql new file mode 100644 index 000000000..7f8e9671c --- /dev/null +++ b/tests/tpcdsbench/src/queries/query90.sql @@ -0,0 +1,21 @@ +-- start query 90 in stream 0 using template query90.tpl and seed QUALIFICATION + select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio + from ( select count(*) amc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 8 and 8+1 + and household_demographics.hd_dep_count = 6 + and web_page.wp_char_count between 5000 and 5200) at1, + ( select count(*) pmc + from web_sales, household_demographics , time_dim, web_page + where ws_sold_time_sk = time_dim.t_time_sk + and ws_ship_hdemo_sk = household_demographics.hd_demo_sk + and ws_web_page_sk = web_page.wp_web_page_sk + and time_dim.t_hour between 19 and 19+1 + and household_demographics.hd_dep_count = 6 + and web_page.wp_char_count between 5000 and 5200) pt + order by am_pm_ratio + limit 100; +-- end query 90 in stream 0 using template query90.tpl diff --git a/tests/tpcdsbench/src/queries/query91.sql b/tests/tpcdsbench/src/queries/query91.sql new file mode 100644 index 000000000..d455a9c2e --- /dev/null +++ b/tests/tpcdsbench/src/queries/query91.sql @@ -0,0 +1,30 @@ +-- start query 91 in stream 0 using template query91.tpl and seed QUALIFICATION +select + cc_call_center_id Call_Center, + cc_name Call_Center_Name, + cc_manager Manager, + sum(cr_net_loss) Returns_Loss +from + call_center, + catalog_returns, + date_dim, + customer, + customer_address, + customer_demographics, + household_demographics +where + cr_call_center_sk = cc_call_center_sk +and cr_returned_date_sk = d_date_sk +and cr_returning_customer_sk= c_customer_sk +and cd_demo_sk = c_current_cdemo_sk +and hd_demo_sk = c_current_hdemo_sk +and ca_address_sk = c_current_addr_sk +and d_year = 1998 +and d_moy = 11 +and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') + or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) +and hd_buy_potential like 'Unknown%' +and ca_gmt_offset = -7 +group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status +order by sum(cr_net_loss) desc; +-- end query 91 in stream 0 using template query91.tpl diff --git a/tests/tpcdsbench/src/queries/query92.sql b/tests/tpcdsbench/src/queries/query92.sql new file mode 100644 index 000000000..04a8d5557 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query92.sql @@ -0,0 +1,29 @@ +-- start query 92 in stream 0 using template query92.tpl and seed QUALIFICATION + select + sum(ws_ext_discount_amt) as Excess_Discount_Amount +from + web_sales + ,item + ,date_dim +where +i_manufact_id = 350 +and i_item_sk = ws_item_sk +and d_date between '2000-01-27' and + date_add(cast('2000-01-27' as date), 90 ) +and d_date_sk = ws_sold_date_sk +and ws_ext_discount_amt + > ( + SELECT + 1.3 * avg(ws_ext_discount_amt) + FROM + web_sales + ,date_dim + WHERE + ws_item_sk = i_item_sk + and d_date between '2000-01-27' and + date_add(cast('2000-01-27' as date), 90 ) + and d_date_sk = ws_sold_date_sk + ) +order by sum(ws_ext_discount_amt) + limit 100; +-- end query 92 in stream 0 using template query92.tpl diff --git a/tests/tpcdsbench/src/queries/query93.sql b/tests/tpcdsbench/src/queries/query93.sql new file mode 100644 index 000000000..0975e3f6f --- /dev/null +++ b/tests/tpcdsbench/src/queries/query93.sql @@ -0,0 +1,17 @@ +-- start query 93 in stream 0 using template query93.tpl and seed QUALIFICATION + select ss_customer_sk + ,sum(act_sales) sumsales + from (select ss_item_sk + ,ss_ticket_number + ,ss_customer_sk + ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price + else (ss_quantity*ss_sales_price) end act_sales + from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk + and sr_ticket_number = ss_ticket_number) + ,reason + where sr_reason_sk = r_reason_sk + and r_reason_desc = 'reason 28') t + group by ss_customer_sk + order by sumsales, ss_customer_sk + limit 100; +-- end query 93 in stream 0 using template query93.tpl diff --git a/tests/tpcdsbench/src/queries/query94.sql b/tests/tpcdsbench/src/queries/query94.sql new file mode 100644 index 000000000..25acef23b --- /dev/null +++ b/tests/tpcdsbench/src/queries/query94.sql @@ -0,0 +1,28 @@ +-- start query 94 in stream 0 using template query94.tpl and seed QUALIFICATION + select + count(distinct ws_order_number) as order_count + ,sum(ws_ext_ship_cost) as total_shipping_cost + ,sum(ws_net_profit) as total_net_profit +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between cast('1999-2-01' as date) and + date_add(cast('1999-2-01' as date), 60 ) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'IL' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and exists (select * + from web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) +and not exists(select * + from web_returns wr1 + where ws1.ws_order_number = wr1.wr_order_number) +order by count(distinct ws_order_number) + limit 100; +-- end query 94 in stream 0 using template query94.tpl diff --git a/tests/tpcdsbench/src/queries/query95.sql b/tests/tpcdsbench/src/queries/query95.sql new file mode 100644 index 000000000..bc45a968c --- /dev/null +++ b/tests/tpcdsbench/src/queries/query95.sql @@ -0,0 +1,31 @@ +-- start query 95 in stream 0 using template query95.tpl and seed QUALIFICATION +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as order_count + ,sum(ws_ext_ship_cost) as total_shipping_cost + ,sum(ws_net_profit) as total_net_profit +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + d_date between '1999-2-01' and + date_add(cast('1999-2-01' as date), 60 ) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'IL' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) + limit 100; +-- end query 95 in stream 0 using template query95.tpl diff --git a/tests/tpcdsbench/src/queries/query96.sql b/tests/tpcdsbench/src/queries/query96.sql new file mode 100644 index 000000000..640f41a4f --- /dev/null +++ b/tests/tpcdsbench/src/queries/query96.sql @@ -0,0 +1,15 @@ +-- start query 96 in stream 0 using template query96.tpl and seed QUALIFICATION + select count(*) +from store_sales + ,household_demographics + ,time_dim, store +where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 20 + and time_dim.t_minute >= 30 + and household_demographics.hd_dep_count = 7 + and store.s_store_name = 'ese' +order by count(*) + limit 100; +-- end query 96 in stream 0 using template query96.tpl diff --git a/tests/tpcdsbench/src/queries/query97.sql b/tests/tpcdsbench/src/queries/query97.sql new file mode 100644 index 000000000..9e48b4a7e --- /dev/null +++ b/tests/tpcdsbench/src/queries/query97.sql @@ -0,0 +1,24 @@ +-- start query 97 in stream 0 using template query97.tpl and seed QUALIFICATION +with ssci as ( +select ss_customer_sk customer_sk + ,ss_item_sk item_sk +from store_sales,date_dim +where ss_sold_date_sk = d_date_sk + and d_month_seq between 1200 and 1200 + 11 +group by ss_customer_sk + ,ss_item_sk), +csci as( + select cs_bill_customer_sk customer_sk + ,cs_item_sk item_sk +from catalog_sales,date_dim +where cs_sold_date_sk = d_date_sk + and d_month_seq between 1200 and 1200 + 11 +group by cs_bill_customer_sk + ,cs_item_sk) + select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only + ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only + ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog +from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk + and ssci.item_sk = csci.item_sk) + limit 100; +-- end query 97 in stream 0 using template query97.tpl diff --git a/tests/tpcdsbench/src/queries/query98.sql b/tests/tpcdsbench/src/queries/query98.sql new file mode 100644 index 000000000..54f9384a4 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query98.sql @@ -0,0 +1,32 @@ +-- start query 98 in stream 0 using template query98.tpl and seed QUALIFICATION +select i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price + ,sum(ss_ext_sales_price) as itemrevenue + ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over + (partition by i_class) as revenueratio +from + store_sales + ,item + ,date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Sports', 'Books', 'Home') + and ss_sold_date_sk = d_date_sk + and d_date between cast('1999-02-22' as date) + and date_add(cast('1999-02-22' as date), 30 ) +group by + i_item_id + ,i_item_desc + ,i_category + ,i_class + ,i_current_price +order by + i_category + ,i_class + ,i_item_id + ,i_item_desc + ,revenueratio; +-- end query 98 in stream 0 using template query98.tpl diff --git a/tests/tpcdsbench/src/queries/query99.sql b/tests/tpcdsbench/src/queries/query99.sql new file mode 100644 index 000000000..cb92dd8f1 --- /dev/null +++ b/tests/tpcdsbench/src/queries/query99.sql @@ -0,0 +1,34 @@ +-- start query 99 in stream 0 using template query99.tpl and seed QUALIFICATION + select + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as 30_days + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and + (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as 31_60_days + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and + (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as 61_90_days + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and + (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as 91_120_days + ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as above120_days +from + catalog_sales + ,warehouse + ,ship_mode + ,call_center + ,date_dim +where + d_month_seq between 1200 and 1200 + 11 +and cs_ship_date_sk = d_date_sk +and cs_warehouse_sk = w_warehouse_sk +and cs_ship_mode_sk = sm_ship_mode_sk +and cs_call_center_sk = cc_call_center_sk +group by + substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name +order by substr(w_warehouse_name,1,20) + ,sm_type + ,cc_name + limit 100; +-- end query 99 in stream 0 using template query99.tpl