From a0c486ca5f2cc82519d2f04344b9f1c23917544d Mon Sep 17 00:00:00 2001 From: jrhoads Date: Mon, 2 Dec 2024 10:14:53 +0100 Subject: [PATCH 1/4] Standardize; always use 'terms' for filters, not 'term' --- app/models/doi/graphql_query.rb | 30 ++++++++-------- .../doi/graphql_query_builder_filters_spec.rb | 36 +++++++++++++------ 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/app/models/doi/graphql_query.rb b/app/models/doi/graphql_query.rb index 46611b534..391dba466 100644 --- a/app/models/doi/graphql_query.rb +++ b/app/models/doi/graphql_query.rb @@ -125,22 +125,22 @@ def filters filter = [] filter << { terms: { doi: options[:ids].map(&:upcase) } } if options[:ids].present? - filter << { term: { resource_type_id: options[:resource_type_id].underscore.dasherize } } if options[:resource_type_id].present? + filter << { terms: { resource_type_id: [options[:resource_type_id].underscore.dasherize] } } if options[:resource_type_id].present? filter << { terms: { "types.resourceType": options[:resource_type].split(",") } } if options[:resource_type].present? filter << { terms: { agency: options[:agency].split(",").map(&:downcase) } } if options[:agency].present? filter << { terms: { prefix: options[:prefix].to_s.split(",") } } if options[:prefix].present? filter << { terms: { language: options[:language].to_s.split(",").map(&:downcase) } } if options[:language].present? - filter << { term: { uid: options[:uid] } } if options[:uid].present? + filter << { terms: { uid: [options[:uid]] } } if options[:uid].present? filter << { range: { created: { gte: "#{options[:created].split(',').min}||/y", lte: "#{options[:created].split(',').max}||/y", format: "yyyy" } } } if options[:created].present? filter << { range: { publication_year: { gte: "#{options[:published].split(',').min}||/y", lte: "#{options[:published].split(',').max}||/y", format: "yyyy" } } } if options[:published].present? - filter << { term: { schema_version: "http://datacite.org/schema/kernel-#{options[:schema_version]}" } } if options[:schema_version].present? + filter << { terms: { schema_version: ["http://datacite.org/schema/kernel-#{options[:schema_version]}"] } } if options[:schema_version].present? filter << { terms: { "subjects.subject": options[:subject].split(",") } } if options[:subject].present? if options[:pid_entity].present? - filter << { term: { "subjects.subjectScheme": "PidEntity" } } + filter << { terms: { "subjects.subjectScheme": ["PidEntity"] } } filter << { terms: { "subjects.subject": options[:pid_entity].split(",").map(&:humanize) } } end if options[:field_of_science].present? - filter << { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } } + filter << { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } } filter << { terms: { "subjects.subject": options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } end if options[:field_of_science_repository].present? @@ -150,7 +150,7 @@ def filters filter << { terms: { "fields_of_science_combined": options[:field_of_science_combined].split(",").map { |s| s.humanize } } } end filter << { terms: { "rights_list.rightsIdentifier" => options[:license].split(",") } } if options[:license].present? - filter << { term: { source: options[:source] } } if options[:source].present? + filter << { terms: { source: [options[:source]] } } if options[:source].present? filter << { range: { reference_count: { "gte": options[:has_references].to_i } } } if options[:has_references].present? filter << { range: { citation_count: { "gte": options[:has_citations].to_i } } } if options[:has_citations].present? filter << { range: { part_count: { "gte": options[:has_parts].to_i } } } if options[:has_parts].present? @@ -159,25 +159,25 @@ def filters filter << { range: { version_of_count: { "gte": options[:has_version_of].to_i } } } if options[:has_version_of].present? filter << { range: { view_count: { "gte": options[:has_views].to_i } } } if options[:has_views].present? filter << { range: { download_count: { "gte": options[:has_downloads].to_i } } } if options[:has_downloads].present? - filter << { term: { "landing_page.status": options[:link_check_status] } } if options[:link_check_status].present? + filter << { terms: { "landing_page.status": [options[:link_check_status]] } } if options[:link_check_status].present? filter << { exists: { field: "landing_page.checked" } } if options[:link_checked].present? - filter << { term: { "landing_page.hasSchemaOrg": options[:link_check_has_schema_org] } } if options[:link_check_has_schema_org].present? - filter << { term: { "landing_page.bodyHasPid": options[:link_check_body_has_pid] } } if options[:link_check_body_has_pid].present? + filter << { terms: { "landing_page.hasSchemaOrg": [options[:link_check_has_schema_org]] } } if options[:link_check_has_schema_org].present? + filter << { terms: { "landing_page.bodyHasPid": [options[:link_check_body_has_pid]] } } if options[:link_check_body_has_pid].present? filter << { exists: { field: "landing_page.schemaOrgId" } } if options[:link_check_found_schema_org_id].present? filter << { exists: { field: "landing_page.dcIdentifier" } } if options[:link_check_found_dc_identifier].present? filter << { exists: { field: "landing_page.citationDoi" } } if options[:link_check_found_citation_doi].present? filter << { range: { "landing_page.redirectCount": { "gte": options[:link_check_redirect_count_gte] } } } if options[:link_check_redirect_count_gte].present? filter << { terms: { aasm_state: options[:state].to_s.split(",") } } if options[:state].present? filter << { range: { registered: { gte: "#{options[:registered].split(',').min}||/y", lte: "#{options[:registered].split(',').max}||/y", format: "yyyy" } } } if options[:registered].present? - filter << { term: { consortium_id: { value: options[:consortium_id], case_insensitive: true } } } if options[:consortium_id].present? + filter << { terms: { consortium_id: [options[:consortium_id].downcase] } } if options[:consortium_id].present? # TODO align PID parsing - filter << { term: { "client.re3data_id" => doi_from_url(options[:re3data_id]) } } if options[:re3data_id].present? - filter << { term: { "client.opendoar_id" => options[:opendoar_id] } } if options[:opendoar_id].present? + filter << { terms: { "client.re3data_id": [doi_from_url(options[:re3data_id])] } } if options[:re3data_id].present? + filter << { terms: { "client.opendoar_id": [options[:opendoar_id]] } } if options[:opendoar_id].present? filter << { terms: { "client.certificate" => options[:certificate].split(",") } } if options[:certificate].present? filter << { terms: { "creators.nameIdentifiers.nameIdentifier" => options[:user_id].split(",").collect { |id| "https://orcid.org/#{orcid_from_url(id)}" } } } if options[:user_id].present? - filter << { term: { "creators.nameIdentifiers.nameIdentifierScheme" => "ORCID" } } if options[:has_person].present? - filter << { term: { "client.client_type" => options[:client_type] } } if options[:client_type] - filter << { term: { "types.resourceTypeGeneral" => "PhysicalObject" } } if options[:client_type] == "igsnCatalog" + filter << { terms: { "creators.nameIdentifiers.nameIdentifierScheme": ["ORCID"] } } if options[:has_person].present? + filter << { terms: { "client.client_type": [options[:client_type]] } } if options[:client_type] + filter << { terms: { "types.resourceTypeGeneral": ["PhysicalObject"] } } if options[:client_type] == "igsnCatalog" filter end diff --git a/spec/models/doi/graphql_query_builder_filters_spec.rb b/spec/models/doi/graphql_query_builder_filters_spec.rb index 261ef7af2..5953846a0 100644 --- a/spec/models/doi/graphql_query_builder_filters_spec.rb +++ b/spec/models/doi/graphql_query_builder_filters_spec.rb @@ -1,5 +1,3 @@ - - # frozen_string_literal: true require "rails_helper" @@ -22,7 +20,7 @@ options = { resource_type_id: "Journal_Article" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { resource_type_id: "journal-article" } } + { terms: { resource_type_id: ["journal-article"] } } ) end @@ -63,7 +61,7 @@ options = { uid: "10.5438/0012" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { uid: "10.5438/0012" } } + { terms: { uid: ["10.5438/0012"] } } ) end @@ -79,7 +77,15 @@ options = { consortium_id: "dc" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { consortium_id: { case_insensitive: true, value: "dc" } } } + { terms: { consortium_id: ["dc"] } } + ) + end + + it "handles registered" do + options = { registered: "2021,2023" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { range: { registered: { gte: "2021||/y", lte: "2023||/y", format: "yyyy" } } } ) end @@ -97,7 +103,7 @@ options = { re3data_id: "10.17616/r31njmjx" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "client.re3data_id" => "10.17616/r31njmjx" } } + { terms: { "client.re3data_id": ["10.17616/r31njmjx"] } } ) end @@ -105,7 +111,15 @@ options = { opendoar_id: "123456" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "client.opendoar_id" => "123456" } } + { terms: { "client.opendoar_id": ["123456"] } } + ) + end + + it "handles certificates" do + options = { certificate: "CoreTrustSeal,WDS" } + builder = described_class.new(query, options) + expect(builder.filters).to include( + { terms: { "client.certificate" => ["CoreTrustSeal", "WDS"] } } ) end @@ -207,7 +221,7 @@ options = { pid_entity: "dataset,software" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "subjects.subjectScheme": "PidEntity" } }, + { terms: { "subjects.subjectScheme": ["PidEntity"] } }, { terms: { "subjects.subject": ["Dataset", "Software"] } } ) end @@ -216,7 +230,7 @@ options = { field_of_science: "computer_science,mathematics" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "subjects.subjectScheme": "Fields of Science and Technology (FOS)" } }, + { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } }, { terms: { "subjects.subject": ["FOS: Computer science", "FOS: Mathematics"] } } ) end @@ -227,7 +241,7 @@ options = { link_check_status: "200" } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "landing_page.status": "200" } } + { terms: { "landing_page.status": ["200"] } } ) end @@ -235,7 +249,7 @@ options = { link_check_has_schema_org: true } builder = described_class.new(query, options) expect(builder.filters).to include( - { term: { "landing_page.hasSchemaOrg": true } } + { terms: { "landing_page.hasSchemaOrg": [true] } } ) end end From 10c444cd3a138be1924bc0ab39f1b7b6c2237854 Mon Sep 17 00:00:00 2001 From: jrhoads Date: Mon, 9 Dec 2024 22:06:46 +0100 Subject: [PATCH 2/4] Reorder filter terms. Group by type of field --- app/models/doi/graphql_query.rb | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/app/models/doi/graphql_query.rb b/app/models/doi/graphql_query.rb index 391dba466..3780e0f02 100644 --- a/app/models/doi/graphql_query.rb +++ b/app/models/doi/graphql_query.rb @@ -125,30 +125,16 @@ def filters filter = [] filter << { terms: { doi: options[:ids].map(&:upcase) } } if options[:ids].present? + filter << { terms: { uid: [options[:uid]] } } if options[:uid].present? filter << { terms: { resource_type_id: [options[:resource_type_id].underscore.dasherize] } } if options[:resource_type_id].present? filter << { terms: { "types.resourceType": options[:resource_type].split(",") } } if options[:resource_type].present? filter << { terms: { agency: options[:agency].split(",").map(&:downcase) } } if options[:agency].present? filter << { terms: { prefix: options[:prefix].to_s.split(",") } } if options[:prefix].present? filter << { terms: { language: options[:language].to_s.split(",").map(&:downcase) } } if options[:language].present? - filter << { terms: { uid: [options[:uid]] } } if options[:uid].present? filter << { range: { created: { gte: "#{options[:created].split(',').min}||/y", lte: "#{options[:created].split(',').max}||/y", format: "yyyy" } } } if options[:created].present? filter << { range: { publication_year: { gte: "#{options[:published].split(',').min}||/y", lte: "#{options[:published].split(',').max}||/y", format: "yyyy" } } } if options[:published].present? filter << { terms: { schema_version: ["http://datacite.org/schema/kernel-#{options[:schema_version]}"] } } if options[:schema_version].present? filter << { terms: { "subjects.subject": options[:subject].split(",") } } if options[:subject].present? - if options[:pid_entity].present? - filter << { terms: { "subjects.subjectScheme": ["PidEntity"] } } - filter << { terms: { "subjects.subject": options[:pid_entity].split(",").map(&:humanize) } } - end - if options[:field_of_science].present? - filter << { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } } - filter << { terms: { "subjects.subject": options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } - end - if options[:field_of_science_repository].present? - filter << { terms: { "fields_of_science_repository": options[:field_of_science_repository].split(",").map { |s| s.humanize } } } - end - if options[:field_of_science_combined].present? - filter << { terms: { "fields_of_science_combined": options[:field_of_science_combined].split(",").map { |s| s.humanize } } } - end filter << { terms: { "rights_list.rightsIdentifier" => options[:license].split(",") } } if options[:license].present? filter << { terms: { source: [options[:source]] } } if options[:source].present? filter << { range: { reference_count: { "gte": options[:has_references].to_i } } } if options[:has_references].present? @@ -170,14 +156,27 @@ def filters filter << { terms: { aasm_state: options[:state].to_s.split(",") } } if options[:state].present? filter << { range: { registered: { gte: "#{options[:registered].split(',').min}||/y", lte: "#{options[:registered].split(',').max}||/y", format: "yyyy" } } } if options[:registered].present? filter << { terms: { consortium_id: [options[:consortium_id].downcase] } } if options[:consortium_id].present? - # TODO align PID parsing - filter << { terms: { "client.re3data_id": [doi_from_url(options[:re3data_id])] } } if options[:re3data_id].present? + filter << { terms: { "client.re3data_id": [doi_from_url(options[:re3data_id])] } } if options[:re3data_id].present? # TODO align PID parsing filter << { terms: { "client.opendoar_id": [options[:opendoar_id]] } } if options[:opendoar_id].present? filter << { terms: { "client.certificate" => options[:certificate].split(",") } } if options[:certificate].present? filter << { terms: { "creators.nameIdentifiers.nameIdentifier" => options[:user_id].split(",").collect { |id| "https://orcid.org/#{orcid_from_url(id)}" } } } if options[:user_id].present? filter << { terms: { "creators.nameIdentifiers.nameIdentifierScheme": ["ORCID"] } } if options[:has_person].present? filter << { terms: { "client.client_type": [options[:client_type]] } } if options[:client_type] filter << { terms: { "types.resourceTypeGeneral": ["PhysicalObject"] } } if options[:client_type] == "igsnCatalog" + if options[:pid_entity].present? + filter << { terms: { "subjects.subjectScheme": ["PidEntity"] } } + filter << { terms: { "subjects.subject": options[:pid_entity].split(",").map(&:humanize) } } + end + if options[:field_of_science].present? + filter << { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } } + filter << { terms: { "subjects.subject": options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } + end + if options[:field_of_science_repository].present? + filter << { terms: { "fields_of_science_repository": options[:field_of_science_repository].split(",").map { |s| s.humanize } } } + end + if options[:field_of_science_combined].present? + filter << { terms: { "fields_of_science_combined": options[:field_of_science_combined].split(",").map { |s| s.humanize } } } + end filter end From 948ea00164117c8a0bc38933a94b7e4820d39d3f Mon Sep 17 00:00:00 2001 From: jrhoads Date: Thu, 12 Dec 2024 15:44:48 +0100 Subject: [PATCH 3/4] Refactor filter builder into its own class. Call that from the graphql query builder --- app/models/doi/graphql_query.rb | 62 +---------------- app/models/doi/search/filter_builder.rb | 91 +++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 61 deletions(-) create mode 100644 app/models/doi/search/filter_builder.rb diff --git a/app/models/doi/graphql_query.rb b/app/models/doi/graphql_query.rb index 3780e0f02..571fce420 100644 --- a/app/models/doi/graphql_query.rb +++ b/app/models/doi/graphql_query.rb @@ -118,67 +118,7 @@ def must end def filters - options = @options - - # turn ids into an array if provided as comma-separated string - options[:ids] = options[:ids].split(",") if options[:ids].is_a?(String) - - filter = [] - filter << { terms: { doi: options[:ids].map(&:upcase) } } if options[:ids].present? - filter << { terms: { uid: [options[:uid]] } } if options[:uid].present? - filter << { terms: { resource_type_id: [options[:resource_type_id].underscore.dasherize] } } if options[:resource_type_id].present? - filter << { terms: { "types.resourceType": options[:resource_type].split(",") } } if options[:resource_type].present? - filter << { terms: { agency: options[:agency].split(",").map(&:downcase) } } if options[:agency].present? - filter << { terms: { prefix: options[:prefix].to_s.split(",") } } if options[:prefix].present? - filter << { terms: { language: options[:language].to_s.split(",").map(&:downcase) } } if options[:language].present? - filter << { range: { created: { gte: "#{options[:created].split(',').min}||/y", lte: "#{options[:created].split(',').max}||/y", format: "yyyy" } } } if options[:created].present? - filter << { range: { publication_year: { gte: "#{options[:published].split(',').min}||/y", lte: "#{options[:published].split(',').max}||/y", format: "yyyy" } } } if options[:published].present? - filter << { terms: { schema_version: ["http://datacite.org/schema/kernel-#{options[:schema_version]}"] } } if options[:schema_version].present? - filter << { terms: { "subjects.subject": options[:subject].split(",") } } if options[:subject].present? - filter << { terms: { "rights_list.rightsIdentifier" => options[:license].split(",") } } if options[:license].present? - filter << { terms: { source: [options[:source]] } } if options[:source].present? - filter << { range: { reference_count: { "gte": options[:has_references].to_i } } } if options[:has_references].present? - filter << { range: { citation_count: { "gte": options[:has_citations].to_i } } } if options[:has_citations].present? - filter << { range: { part_count: { "gte": options[:has_parts].to_i } } } if options[:has_parts].present? - filter << { range: { part_of_count: { "gte": options[:has_part_of].to_i } } } if options[:has_part_of].present? - filter << { range: { version_count: { "gte": options[:has_versions].to_i } } } if options[:has_versions].present? - filter << { range: { version_of_count: { "gte": options[:has_version_of].to_i } } } if options[:has_version_of].present? - filter << { range: { view_count: { "gte": options[:has_views].to_i } } } if options[:has_views].present? - filter << { range: { download_count: { "gte": options[:has_downloads].to_i } } } if options[:has_downloads].present? - filter << { terms: { "landing_page.status": [options[:link_check_status]] } } if options[:link_check_status].present? - filter << { exists: { field: "landing_page.checked" } } if options[:link_checked].present? - filter << { terms: { "landing_page.hasSchemaOrg": [options[:link_check_has_schema_org]] } } if options[:link_check_has_schema_org].present? - filter << { terms: { "landing_page.bodyHasPid": [options[:link_check_body_has_pid]] } } if options[:link_check_body_has_pid].present? - filter << { exists: { field: "landing_page.schemaOrgId" } } if options[:link_check_found_schema_org_id].present? - filter << { exists: { field: "landing_page.dcIdentifier" } } if options[:link_check_found_dc_identifier].present? - filter << { exists: { field: "landing_page.citationDoi" } } if options[:link_check_found_citation_doi].present? - filter << { range: { "landing_page.redirectCount": { "gte": options[:link_check_redirect_count_gte] } } } if options[:link_check_redirect_count_gte].present? - filter << { terms: { aasm_state: options[:state].to_s.split(",") } } if options[:state].present? - filter << { range: { registered: { gte: "#{options[:registered].split(',').min}||/y", lte: "#{options[:registered].split(',').max}||/y", format: "yyyy" } } } if options[:registered].present? - filter << { terms: { consortium_id: [options[:consortium_id].downcase] } } if options[:consortium_id].present? - filter << { terms: { "client.re3data_id": [doi_from_url(options[:re3data_id])] } } if options[:re3data_id].present? # TODO align PID parsing - filter << { terms: { "client.opendoar_id": [options[:opendoar_id]] } } if options[:opendoar_id].present? - filter << { terms: { "client.certificate" => options[:certificate].split(",") } } if options[:certificate].present? - filter << { terms: { "creators.nameIdentifiers.nameIdentifier" => options[:user_id].split(",").collect { |id| "https://orcid.org/#{orcid_from_url(id)}" } } } if options[:user_id].present? - filter << { terms: { "creators.nameIdentifiers.nameIdentifierScheme": ["ORCID"] } } if options[:has_person].present? - filter << { terms: { "client.client_type": [options[:client_type]] } } if options[:client_type] - filter << { terms: { "types.resourceTypeGeneral": ["PhysicalObject"] } } if options[:client_type] == "igsnCatalog" - if options[:pid_entity].present? - filter << { terms: { "subjects.subjectScheme": ["PidEntity"] } } - filter << { terms: { "subjects.subject": options[:pid_entity].split(",").map(&:humanize) } } - end - if options[:field_of_science].present? - filter << { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } } - filter << { terms: { "subjects.subject": options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } - end - if options[:field_of_science_repository].present? - filter << { terms: { "fields_of_science_repository": options[:field_of_science_repository].split(",").map { |s| s.humanize } } } - end - if options[:field_of_science_combined].present? - filter << { terms: { "fields_of_science_combined": options[:field_of_science_combined].split(",").map { |s| s.humanize } } } - end - - filter + Doi::Search::FilterBuilder.new(@options).build end def get_should_clause diff --git a/app/models/doi/search/filter_builder.rb b/app/models/doi/search/filter_builder.rb new file mode 100644 index 000000000..f1e0955f1 --- /dev/null +++ b/app/models/doi/search/filter_builder.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +class Doi + module Search + class FilterBuilder + include Modelable + + def initialize(options) + @options = options + end + + def build + options = @options + + # turn ids into an array if provided as comma-separated string + options[:ids] = options[:ids].split(",") if options[:ids].is_a?(String) + + filter = [] + filter << { terms: { doi: options[:ids].map(&:upcase) } } if options[:ids].present? + filter << { terms: { uid: [options[:uid]] } } if options[:uid].present? + filter << { terms: { resource_type_id: [options[:resource_type_id].underscore.dasherize] } } if options[:resource_type_id].present? + filter << { terms: { "types.resourceType": options[:resource_type].split(",") } } if options[:resource_type].present? + filter << { terms: { agency: options[:agency].split(",").map(&:downcase) } } if options[:agency].present? + filter << { terms: { prefix: options[:prefix].to_s.split(",") } } if options[:prefix].present? + filter << { terms: { language: options[:language].to_s.split(",").map(&:downcase) } } if options[:language].present? + filter << { range: { created: { gte: "#{options[:created].split(',').min}||/y", lte: "#{options[:created].split(',').max}||/y", format: "yyyy" } } } if options[:created].present? + filter << { range: { publication_year: { gte: "#{options[:published].split(',').min}||/y", lte: "#{options[:published].split(',').max}||/y", format: "yyyy" } } } if options[:published].present? + filter << { terms: { schema_version: ["http://datacite.org/schema/kernel-#{options[:schema_version]}"] } } if options[:schema_version].present? + filter << { terms: { "subjects.subject": options[:subject].split(",") } } if options[:subject].present? + filter << { terms: { "rights_list.rightsIdentifier" => options[:license].split(",") } } if options[:license].present? + filter << { terms: { source: [options[:source]] } } if options[:source].present? + filter << { range: { reference_count: { "gte": options[:has_references].to_i } } } if options[:has_references].present? + filter << { range: { citation_count: { "gte": options[:has_citations].to_i } } } if options[:has_citations].present? + filter << { range: { part_count: { "gte": options[:has_parts].to_i } } } if options[:has_parts].present? + filter << { range: { part_of_count: { "gte": options[:has_part_of].to_i } } } if options[:has_part_of].present? + filter << { range: { version_count: { "gte": options[:has_versions].to_i } } } if options[:has_versions].present? + filter << { range: { version_of_count: { "gte": options[:has_version_of].to_i } } } if options[:has_version_of].present? + filter << { range: { view_count: { "gte": options[:has_views].to_i } } } if options[:has_views].present? + filter << { range: { download_count: { "gte": options[:has_downloads].to_i } } } if options[:has_downloads].present? + filter << { terms: { "landing_page.status": [options[:link_check_status]] } } if options[:link_check_status].present? + filter << { exists: { field: "landing_page.checked" } } if options[:link_checked].present? + filter << { terms: { "landing_page.hasSchemaOrg": [options[:link_check_has_schema_org]] } } if options[:link_check_has_schema_org].present? + filter << { terms: { "landing_page.bodyHasPid": [options[:link_check_body_has_pid]] } } if options[:link_check_body_has_pid].present? + filter << { exists: { field: "landing_page.schemaOrgId" } } if options[:link_check_found_schema_org_id].present? + filter << { exists: { field: "landing_page.dcIdentifier" } } if options[:link_check_found_dc_identifier].present? + filter << { exists: { field: "landing_page.citationDoi" } } if options[:link_check_found_citation_doi].present? + filter << { range: { "landing_page.redirectCount": { "gte": options[:link_check_redirect_count_gte] } } } if options[:link_check_redirect_count_gte].present? + filter << { terms: { aasm_state: options[:state].to_s.split(",") } } if options[:state].present? + filter << { range: { registered: { gte: "#{options[:registered].split(',').min}||/y", lte: "#{options[:registered].split(',').max}||/y", format: "yyyy" } } } if options[:registered].present? + filter << { terms: { consortium_id: [options[:consortium_id].downcase] } } if options[:consortium_id].present? + filter << { terms: { "client.re3data_id": [doi_from_url(options[:re3data_id])] } } if options[:re3data_id].present? # TODO align PID parsing + filter << { terms: { "client.opendoar_id": [options[:opendoar_id]] } } if options[:opendoar_id].present? + filter << { terms: { "client.certificate" => options[:certificate].split(",") } } if options[:certificate].present? + filter << { terms: { "creators.nameIdentifiers.nameIdentifier" => options[:user_id].split(",").collect { |id| "https://orcid.org/#{orcid_from_url(id)}" } } } if options[:user_id].present? + filter << { terms: { "creators.nameIdentifiers.nameIdentifierScheme": ["ORCID"] } } if options[:has_person].present? + filter << { terms: { "client.client_type": [options[:client_type]] } } if options[:client_type] + filter << { terms: { "types.resourceTypeGeneral": ["PhysicalObject"] } } if options[:client_type] == "igsnCatalog" + filter.push(*build_pid_entity_filter) if options[:pid_entity].present? + filter.push(*build_field_of_science_filter) if options[:field_of_science].present? + filter << build_field_of_science_repository_filter if options[:field_of_science_repository].present? + filter << build_field_of_science_combined_filter if options[:field_of_science_combined].present? + + filter + end + + private + + def build_pid_entity_filter + [ + { terms: { "subjects.subjectScheme": ["PidEntity"] } }, + { terms: { "subjects.subject": @options[:pid_entity].split(",").map(&:humanize) } } + ] + end + + def build_field_of_science_filter + [ + { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } }, + { terms: { "subjects.subject": @options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } + ] + end + + def build_field_of_science_repository_filter + { terms: { "fields_of_science_repository": @options[:field_of_science_repository].split(",").map { |s| s.humanize } } } + end + + def build_field_of_science_combined_filter + { terms: { "fields_of_science_combined": @options[:field_of_science_combined].split(",").map { |s| s.humanize } } } + end + end + end +end From 24537887a543589a57aa729b484ab61d46a4f30b Mon Sep 17 00:00:00 2001 From: jrhoads Date: Thu, 12 Dec 2024 15:46:33 +0100 Subject: [PATCH 4/4] Appease Rubocop --- app/models/doi/search/filter_builder.rb | 37 ++++++++++++------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/app/models/doi/search/filter_builder.rb b/app/models/doi/search/filter_builder.rb index f1e0955f1..76c9ab3f4 100644 --- a/app/models/doi/search/filter_builder.rb +++ b/app/models/doi/search/filter_builder.rb @@ -64,28 +64,27 @@ def build end private + def build_pid_entity_filter + [ + { terms: { "subjects.subjectScheme": ["PidEntity"] } }, + { terms: { "subjects.subject": @options[:pid_entity].split(",").map(&:humanize) } } + ] + end - def build_pid_entity_filter - [ - { terms: { "subjects.subjectScheme": ["PidEntity"] } }, - { terms: { "subjects.subject": @options[:pid_entity].split(",").map(&:humanize) } } - ] - end + def build_field_of_science_filter + [ + { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } }, + { terms: { "subjects.subject": @options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } + ] + end - def build_field_of_science_filter - [ - { terms: { "subjects.subjectScheme": ["Fields of Science and Technology (FOS)"] } }, - { terms: { "subjects.subject": @options[:field_of_science].split(",").map { |s| "FOS: " + s.humanize } } } - ] - end + def build_field_of_science_repository_filter + { terms: { "fields_of_science_repository": @options[:field_of_science_repository].split(",").map { |s| s.humanize } } } + end - def build_field_of_science_repository_filter - { terms: { "fields_of_science_repository": @options[:field_of_science_repository].split(",").map { |s| s.humanize } } } - end - - def build_field_of_science_combined_filter - { terms: { "fields_of_science_combined": @options[:field_of_science_combined].split(",").map { |s| s.humanize } } } - end + def build_field_of_science_combined_filter + { terms: { "fields_of_science_combined": @options[:field_of_science_combined].split(",").map { |s| s.humanize } } } + end end end end