diff --git a/app/models/doi/graphql_query.rb b/app/models/doi/graphql_query.rb index 46611b534..571fce420 100644 --- a/app/models/doi/graphql_query.rb +++ b/app/models/doi/graphql_query.rb @@ -118,68 +118,7 @@ def must end def filters - options = @options - - # turn ids into an array if provided as comma-separated string - options[:ids] = options[:ids].split(",") if options[:ids].is_a?(String) - - filter = [] - filter << { terms: { doi: options[:ids].map(&:upcase) } } if options[:ids].present? - filter << { term: { resource_type_id: options[:resource_type_id].underscore.dasherize } } if options[:resource_type_id].present? - filter << { terms: { "types.resourceType": options[:resource_type].split(",") } } if options[:resource_type].present? - filter << { terms: { agency: options[:agency].split(",").map(&:downcase) } } if options[:agency].present? - filter << { terms: { prefix: options[:prefix].to_s.split(",") } } if options[:prefix].present? - filter << { terms: { language: options[:language].to_s.split(",").map(&:downcase) } } if options[:language].present? - filter << { term: { uid: options[:uid] } } if options[:uid].present? - filter << { range: { created: { gte: "#{options[:created].split(',').min}||/y", lte: "#{options[:created].split(',').max}||/y", format: "yyyy" } } } if options[:created].present? - filter << { range: { publication_year: { gte: "#{options[:published].split(',').min}||/y", lte: "#{options[:published].split(',').max}||/y", format: "yyyy" } } } if options[:published].present? - filter << { term: { schema_version: "http://datacite.org/schema/kernel-#{options[:schema_version]}" } } if options[:schema_version].present? - filter << { terms: { "subjects.subject": options[:subject].split(",") } } if options[:subject].present? - if options[:pid_entity].present? - filter << { term: { "subjects.subjectScheme": "PidEntity" } } - filter << { terms: { "subjects.subject": options[:pid_entity].split(",").map(&:humanize) } } - end - if options[:field_of_science].present? - filter << { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } } - filter << { terms: { "subjects.subject": options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } - end - if options[:field_of_science_repository].present? - filter << { terms: { "fields_of_science_repository": options[:field_of_science_repository].split(",").map { |s| s.humanize } } } - end - if options[:field_of_science_combined].present? - filter << { terms: { "fields_of_science_combined": options[:field_of_science_combined].split(",").map { |s| s.humanize } } } - end - filter << { terms: { "rights_list.rightsIdentifier" => options[:license].split(",") } } if options[:license].present? - filter << { term: { source: options[:source] } } if options[:source].present? - filter << { range: { reference_count: { "gte": options[:has_references].to_i } } } if options[:has_references].present? - filter << { range: { citation_count: { "gte": options[:has_citations].to_i } } } if options[:has_citations].present? - filter << { range: { part_count: { "gte": options[:has_parts].to_i } } } if options[:has_parts].present? - filter << { range: { part_of_count: { "gte": options[:has_part_of].to_i } } } if options[:has_part_of].present? - filter << { range: { version_count: { "gte": options[:has_versions].to_i } } } if options[:has_versions].present? - filter << { range: { version_of_count: { "gte": options[:has_version_of].to_i } } } if options[:has_version_of].present? - filter << { range: { view_count: { "gte": options[:has_views].to_i } } } if options[:has_views].present? - filter << { range: { download_count: { "gte": options[:has_downloads].to_i } } } if options[:has_downloads].present? - filter << { term: { "landing_page.status": options[:link_check_status] } } if options[:link_check_status].present? - filter << { exists: { field: "landing_page.checked" } } if options[:link_checked].present? - filter << { term: { "landing_page.hasSchemaOrg": options[:link_check_has_schema_org] } } if options[:link_check_has_schema_org].present? - filter << { term: { "landing_page.bodyHasPid": options[:link_check_body_has_pid] } } if options[:link_check_body_has_pid].present? - filter << { exists: { field: "landing_page.schemaOrgId" } } if options[:link_check_found_schema_org_id].present? - filter << { exists: { field: "landing_page.dcIdentifier" } } if options[:link_check_found_dc_identifier].present? - filter << { exists: { field: "landing_page.citationDoi" } } if options[:link_check_found_citation_doi].present? - filter << { range: { "landing_page.redirectCount": { "gte": options[:link_check_redirect_count_gte] } } } if options[:link_check_redirect_count_gte].present? - filter << { terms: { aasm_state: options[:state].to_s.split(",") } } if options[:state].present? - filter << { range: { registered: { gte: "#{options[:registered].split(',').min}||/y", lte: "#{options[:registered].split(',').max}||/y", format: "yyyy" } } } if options[:registered].present? - filter << { term: { consortium_id: { value: options[:consortium_id], case_insensitive: true } } } if options[:consortium_id].present? - # TODO align PID parsing - filter << { term: { "client.re3data_id" => doi_from_url(options[:re3data_id]) } } if options[:re3data_id].present? - filter << { term: { "client.opendoar_id" => options[:opendoar_id] } } if options[:opendoar_id].present? - filter << { terms: { "client.certificate" => options[:certificate].split(",") } } if options[:certificate].present? - filter << { terms: { "creators.nameIdentifiers.nameIdentifier" => options[:user_id].split(",").collect { |id| "https://orcid.org/#{orcid_from_url(id)}" } } } if options[:user_id].present? - filter << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ORCID" } } if options[:has_person].present? - filter << { term: { "client.client_type" => options[:client_type] } } if options[:client_type] - filter << { term: { "types.resourceTypeGeneral" => "PhysicalObject" } } if options[:client_type] == "igsnCatalog" - - filter + Doi::Search::FilterBuilder.new(@options).build end def get_should_clause diff --git a/app/models/doi/search/filter_builder.rb b/app/models/doi/search/filter_builder.rb new file mode 100644 index 000000000..76c9ab3f4 --- /dev/null +++ b/app/models/doi/search/filter_builder.rb @@ -0,0 +1,90 @@ +# frozen_string_literal: true + +class Doi + module Search + class FilterBuilder + include Modelable + + def initialize(options) + @options = options + end + + def build + options = @options + + # turn ids into an array if provided as comma-separated string + options[:ids] = options[:ids].split(",") if options[:ids].is_a?(String) + + filter = [] + filter << { terms: { doi: options[:ids].map(&:upcase) } } if options[:ids].present? + filter << { terms: { uid: [options[:uid]] } } if options[:uid].present? + filter << { terms: { resource_type_id: [options[:resource_type_id].underscore.dasherize] } } if options[:resource_type_id].present? + filter << { terms: { "types.resourceType": options[:resource_type].split(",") } } if options[:resource_type].present? + filter << { terms: { agency: options[:agency].split(",").map(&:downcase) } } if options[:agency].present? + filter << { terms: { prefix: options[:prefix].to_s.split(",") } } if options[:prefix].present? + filter << { terms: { language: options[:language].to_s.split(",").map(&:downcase) } } if options[:language].present? + filter << { range: { created: { gte: "#{options[:created].split(',').min}||/y", lte: "#{options[:created].split(',').max}||/y", format: "yyyy" } } } if options[:created].present? + filter << { range: { publication_year: { gte: "#{options[:published].split(',').min}||/y", lte: "#{options[:published].split(',').max}||/y", format: "yyyy" } } } if options[:published].present? + filter << { terms: { schema_version: ["http://datacite.org/schema/kernel-#{options[:schema_version]}"] } } if options[:schema_version].present? + filter << { terms: { "subjects.subject": options[:subject].split(",") } } if options[:subject].present? + filter << { terms: { "rights_list.rightsIdentifier" => options[:license].split(",") } } if options[:license].present? + filter << { terms: { source: [options[:source]] } } if options[:source].present? + filter << { range: { reference_count: { "gte": options[:has_references].to_i } } } if options[:has_references].present? + filter << { range: { citation_count: { "gte": options[:has_citations].to_i } } } if options[:has_citations].present? + filter << { range: { part_count: { "gte": options[:has_parts].to_i } } } if options[:has_parts].present? + filter << { range: { part_of_count: { "gte": options[:has_part_of].to_i } } } if options[:has_part_of].present? + filter << { range: { version_count: { "gte": options[:has_versions].to_i } } } if options[:has_versions].present? + filter << { range: { version_of_count: { "gte": options[:has_version_of].to_i } } } if options[:has_version_of].present? + filter << { range: { view_count: { "gte": options[:has_views].to_i } } } if options[:has_views].present? + filter << { range: { download_count: { "gte": options[:has_downloads].to_i } } } if options[:has_downloads].present? + filter << { terms: { "landing_page.status": [options[:link_check_status]] } } if options[:link_check_status].present? + filter << { exists: { field: "landing_page.checked" } } if options[:link_checked].present? + filter << { terms: { "landing_page.hasSchemaOrg": [options[:link_check_has_schema_org]] } } if options[:link_check_has_schema_org].present? + filter << { terms: { "landing_page.bodyHasPid": [options[:link_check_body_has_pid]] } } if options[:link_check_body_has_pid].present? + filter << { exists: { field: "landing_page.schemaOrgId" } } if options[:link_check_found_schema_org_id].present? + filter << { exists: { field: "landing_page.dcIdentifier" } } if options[:link_check_found_dc_identifier].present? + filter << { exists: { field: "landing_page.citationDoi" } } if options[:link_check_found_citation_doi].present? + filter << { range: { "landing_page.redirectCount": { "gte": options[:link_check_redirect_count_gte] } } } if options[:link_check_redirect_count_gte].present? + filter << { terms: { aasm_state: options[:state].to_s.split(",") } } if options[:state].present? + filter << { range: { registered: { gte: "#{options[:registered].split(',').min}||/y", lte: "#{options[:registered].split(',').max}||/y", format: "yyyy" } } } if options[:registered].present? + filter << { terms: { consortium_id: [options[:consortium_id].downcase] } } if options[:consortium_id].present? + filter << { terms: { "client.re3data_id": [doi_from_url(options[:re3data_id])] } } if options[:re3data_id].present? # TODO align PID parsing + filter << { terms: { "client.opendoar_id": [options[:opendoar_id]] } } if options[:opendoar_id].present? + filter << { terms: { "client.certificate" => options[:certificate].split(",") } } if options[:certificate].present? + filter << { terms: { "creators.nameIdentifiers.nameIdentifier" => options[:user_id].split(",").collect { |id| "https://orcid.org/#{orcid_from_url(id)}" } } } if options[:user_id].present? + filter << { terms: { "creators.nameIdentifiers.nameIdentifierScheme": ["ORCID"] } } if options[:has_person].present? + filter << { terms: { "client.client_type": [options[:client_type]] } } if options[:client_type] + filter << { terms: { "types.resourceTypeGeneral": ["PhysicalObject"] } } if options[:client_type] == "igsnCatalog" + filter.push(*build_pid_entity_filter) if options[:pid_entity].present? + filter.push(*build_field_of_science_filter) if options[:field_of_science].present? + filter << build_field_of_science_repository_filter if options[:field_of_science_repository].present? + filter << build_field_of_science_combined_filter if options[:field_of_science_combined].present? + + filter + end + + private + def build_pid_entity_filter + [ + { terms: { "subjects.subjectScheme": ["PidEntity"] } }, + { terms: { "subjects.subject": @options[:pid_entity].split(",").map(&:humanize) } } + ] + end + + def build_field_of_science_filter + [ + { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } }, + { terms: { "subjects.subject": @options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } + ] + end + + def build_field_of_science_repository_filter + { terms: { "fields_of_science_repository": @options[:field_of_science_repository].split(",").map { |s| s.humanize } } } + end + + def build_field_of_science_combined_filter + { terms: { "fields_of_science_combined": @options[:field_of_science_combined].split(",").map { |s| s.humanize } } } + end + end + end +end diff --git a/spec/models/doi/graphql_query_builder_filters_spec.rb b/spec/models/doi/graphql_query_builder_filters_spec.rb index 261ef7af2..5953846a0 100644 --- a/spec/models/doi/graphql_query_builder_filters_spec.rb +++ b/spec/models/doi/graphql_query_builder_filters_spec.rb @@ -1,5 +1,3 @@ - - # frozen_string_literal: true require "rails_helper" @@ -22,7 +20,7 @@ options = { resource_type_id: "Journal_Article" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { resource_type_id: "journal-article" } } + { terms: { resource_type_id: ["journal-article"] } } ) end @@ -63,7 +61,7 @@ options = { uid: "10.5438/0012" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { uid: "10.5438/0012" } } + { terms: { uid: ["10.5438/0012"] } } ) end @@ -79,7 +77,15 @@ options = { consortium_id: "dc" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { consortium_id: { case_insensitive: true, value: "dc" } } } + { terms: { consortium_id: ["dc"] } } + ) + end + + it "handles registered" do + options = { registered: "2021,2023" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { registered: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } ) end @@ -97,7 +103,7 @@ options = { re3data_id: "10.17616/r31njmjx" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "client.re3data_id" => "10.17616/r31njmjx" } } + { terms: { "client.re3data_id": ["10.17616/r31njmjx"] } } ) end @@ -105,7 +111,15 @@ options = { opendoar_id: "123456" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "client.opendoar_id" => "123456" } } + { terms: { "client.opendoar_id": ["123456"] } } + ) + end + + it "handles certificates" do + options = { certificate: "CoreTrustSeal,WDS" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { "client.certificate" => ["CoreTrustSeal", "WDS"] } } ) end @@ -207,7 +221,7 @@ options = { pid_entity: "dataset,software" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "subjects.subjectScheme": "PidEntity" } }, + { terms: { "subjects.subjectScheme": ["PidEntity"] } }, { terms: { "subjects.subject": ["Dataset", "Software"] } } ) end @@ -216,7 +230,7 @@ options = { field_of_science: "computer_science,mathematics" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, + { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } }, { terms: { "subjects.subject": ["FOS: Computer science", "FOS: Mathematics"] } } ) end @@ -227,7 +241,7 @@ options = { link_check_status: "200" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "landing_page.status": "200" } } + { terms: { "landing_page.status": ["200"] } } ) end @@ -235,7 +249,7 @@ options = { link_check_has_schema_org: true } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "landing_page.hasSchemaOrg": true } } + { terms: { "landing_page.hasSchemaOrg": [true] } } ) end end