From d1d36cabd491d8650ae90cef682059874405167b Mon Sep 17 00:00:00 2001 From: Mair Date: Sun, 2 Mar 2025 15:47:23 +0400 Subject: [PATCH 1/4] init --- .gitignore | 2 + .ruby-version | 2 +- Gemfile | 9 ++- Gemfile.lock | 24 +++++++- Readme.md | 2 +- case-study.md | 58 +++++++++++++++++++ config/routes.rb | 1 + ...0250227170310_create_pghero_query_stats.rb | 15 +++++ ...0250227171011_create_pghero_space_stats.rb | 13 +++++ db/schema.rb | 37 +++++++++--- 10 files changed, 150 insertions(+), 13 deletions(-) create mode 100644 case-study.md create mode 100644 db/migrate/20250227170310_create_pghero_query_stats.rb create mode 100644 db/migrate/20250227171011_create_pghero_space_stats.rb diff --git a/.gitignore b/.gitignore index 59c74047..16d618bc 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ /tmp /log /public +/profile_reports +/.idea diff --git a/.ruby-version b/.ruby-version index 47b322c9..bea438e9 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -3.4.1 +3.3.1 diff --git a/Gemfile b/Gemfile index 34074dfd..2f1c8acd 100644 --- a/Gemfile +++ b/Gemfile @@ -9,6 +9,13 @@ gem 'puma' gem 'listen' gem 'bootsnap' gem 'rack-mini-profiler' - +gem 'sprockets-rails', :require => 'sprockets/railtie' # Windows does not include zoneinfo files, so bundle the tzinfo-data gem gem 'tzinfo-data', platforms: [:mingw, :mswin, :x64_mingw, :jruby] + +group :development do + gem 'pghero' + gem "pg_query", ">= 2" + gem "ruby-prof" + gem "memory_profiler" +end diff --git a/Gemfile.lock b/Gemfile.lock index a9ddd818..4a9528ce 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -87,6 +87,9 @@ GEM ffi (1.17.1-arm64-darwin) globalid (1.2.1) activesupport (>= 6.1) + google-protobuf (4.29.3-arm64-darwin) + bigdecimal + rake (>= 13) i18n (1.14.7) concurrent-ruby (~> 1.0) io-console (0.8.0) @@ -107,6 +110,7 @@ GEM net-pop net-smtp marcel (1.0.4) + memory_profiler (1.1.0) mini_mime (1.1.5) minitest (5.25.4) msgpack (1.8.0) @@ -123,6 +127,10 @@ GEM nokogiri (1.18.2-arm64-darwin) racc (~> 1.4) pg (1.5.9) + pg_query (6.0.0) + google-protobuf (>= 3.25.3) + pghero (3.6.1) + activerecord (>= 6.1) pp (0.6.2) prettyprint prettyprint (0.2.0) @@ -179,7 +187,15 @@ GEM psych (>= 4.0.0) reline (0.6.0) io-console (~> 0.5) + ruby-prof (1.7.1) securerandom (0.4.1) + sprockets (4.2.1) + concurrent-ruby (~> 1.0) + rack (>= 2.2.4, < 4) + sprockets-rails (3.5.2) + actionpack (>= 6.1) + activesupport (>= 6.1) + sprockets (>= 3.0.0) stringio (3.1.2) thor (1.3.2) timeout (0.4.3) @@ -194,19 +210,25 @@ GEM zeitwerk (2.7.1) PLATFORMS + arm64-darwin-23 arm64-darwin-24 DEPENDENCIES bootsnap listen + memory_profiler pg + pg_query (>= 2) + pghero puma rack-mini-profiler rails (~> 8.0.1) + ruby-prof + sprockets-rails tzinfo-data RUBY VERSION - ruby 3.4.1p0 + ruby 3.3.1p55 BUNDLED WITH 2.6.2 diff --git a/Readme.md b/Readme.md index 23864b3e..4f62730e 100644 --- a/Readme.md +++ b/Readme.md @@ -30,7 +30,7 @@ Нужно оптимизировать механизм перезагрузки расписания из файла так, чтобы он импортировал файл `large.json` **в пределах минуты**. -`rake reload_json[fixtures/large.json]` +`rake "reload_json[fixtures/large.json]"` Для импорта этого объёма данных - вам может помочь batch-import diff --git a/case-study.md b/case-study.md new file mode 100644 index 00000000..28eee554 --- /dev/null +++ b/case-study.md @@ -0,0 +1,58 @@ +# Актуальная проблема +## В проекте возникли две ключевые проблемы производительности: + +Медленный импорт данных из JSON файлов в базу данных. Текущая наивная реализация не способна обработать large.json (100K записей) за приемлемое время. + +Неэффективное отображение расписания автобусов на странице, что приводит к значительным задержкам при увеличении объема данных. + +## Формирование метрик +Для оценки эффективности оптимизации определены следующие метрики: + +Для импорта данных: +Время выполнения импорта large.json (целевое значение - менее 1 минуты) +Потребление памяти во время импорта +CPU usage + +Для отображения расписания: +Время рендеринга страницы +Количество SQL запросов +Время выполнения SQL запросов + +## Гарантия корректности +Для обеспечения корректности оптимизации: + +Написан интеграционный тест, проверяющий идентичность отображения данных из example.json до и после оптимизации +Используются стандартные Rails-тесты для проверки моделей и контроллеров +Сравнение результатов рендеринга страницы до и после оптимизации + +## Feedback-Loop +Построен быстрый цикл обратной связи: + +Для импорта: +Подготовлены тестовые JSON файлы разного размера (small.json, medium.jsom, large.json) +pg_hero + +Для отображения: +rack-mini-profiler для профилирования рендеринга +pg_hero + +## Поиск точек роста +Использованы инструменты профилирования: + +Для импорта: +ruby-prof для анализа hot spots +memory_profiler для отслеживания утечек памяти +stackprof для профилирования CPU + +Для отображения: +rack-mini-profiler +bullet для выявления N+1 запросов +pghero для анализа производительности PostgreSQL + +## Результаты оптимизации +(будут заполнены после выполнения оптимизации): + +Время импорта large.json: X секунд +Время рендеринга страницы Самара-Москва: Y мс +Количество SQL запросов: уменьшено с Z до W +Этот case-study будет дополняться конкретными цифрами и деталями реализации по мере выполнения оптимизации. diff --git a/config/routes.rb b/config/routes.rb index 0bbefa7a..61c99df7 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -1,4 +1,5 @@ Rails.application.routes.draw do # For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html get "автобусы/:from/:to" => "trips#index" + mount PgHero::Engine, at: "pghero" if Rails.env.development? end diff --git a/db/migrate/20250227170310_create_pghero_query_stats.rb b/db/migrate/20250227170310_create_pghero_query_stats.rb new file mode 100644 index 00000000..74aaaa9a --- /dev/null +++ b/db/migrate/20250227170310_create_pghero_query_stats.rb @@ -0,0 +1,15 @@ +class CreatePgheroQueryStats < ActiveRecord::Migration[8.0] + def change + create_table :pghero_query_stats do |t| + t.text :database + t.text :user + t.text :query + t.integer :query_hash, limit: 8 + t.float :total_time + t.integer :calls, limit: 8 + t.timestamp :captured_at + end + + add_index :pghero_query_stats, [:database, :captured_at] + end +end diff --git a/db/migrate/20250227171011_create_pghero_space_stats.rb b/db/migrate/20250227171011_create_pghero_space_stats.rb new file mode 100644 index 00000000..5592db37 --- /dev/null +++ b/db/migrate/20250227171011_create_pghero_space_stats.rb @@ -0,0 +1,13 @@ +class CreatePgheroSpaceStats < ActiveRecord::Migration[8.0] + def change + create_table :pghero_space_stats do |t| + t.text :database + t.text :schema + t.text :relation + t.integer :size, limit: 8 + t.timestamp :captured_at + end + + add_index :pghero_space_stats, [:database, :captured_at] + end +end diff --git a/db/schema.rb b/db/schema.rb index f6921e45..9229b14f 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -2,18 +2,18 @@ # of editing this file, please use the migrations feature of Active Record to # incrementally modify your database, and then regenerate this schema definition. # -# Note that this schema.rb definition is the authoritative source for your -# database schema. If you need to create the application database on another -# system, you should be using db:schema:load, not running all the migrations -# from scratch. The latter is a flawed and unsustainable approach (the more migrations -# you'll amass, the slower it'll run and the greater likelihood for issues). +# This file is the source Rails uses to define your schema when running `bin/rails +# db:schema:load`. When creating a new database, `bin/rails db:schema:load` tends to +# be faster and is potentially less error prone than running all of your +# migrations from scratch. Old migrations may fail to apply correctly if those +# migrations use external dependencies or application code. # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2019_03_30_193044) do - +ActiveRecord::Schema[8.0].define(version: 2025_02_27_171011) do # These are extensions that must be enabled in order to support this database - enable_extension "plpgsql" + enable_extension "pg_catalog.plpgsql" + enable_extension "pg_stat_statements" create_table "buses", force: :cascade do |t| t.string "number" @@ -29,6 +29,26 @@ t.string "name" end + create_table "pghero_query_stats", force: :cascade do |t| + t.text "database" + t.text "user" + t.text "query" + t.bigint "query_hash" + t.float "total_time" + t.bigint "calls" + t.datetime "captured_at", precision: nil + t.index ["database", "captured_at"], name: "index_pghero_query_stats_on_database_and_captured_at" + end + + create_table "pghero_space_stats", force: :cascade do |t| + t.text "database" + t.text "schema" + t.text "relation" + t.bigint "size" + t.datetime "captured_at", precision: nil + t.index ["database", "captured_at"], name: "index_pghero_space_stats_on_database_and_captured_at" + end + create_table "services", force: :cascade do |t| t.string "name" end @@ -41,5 +61,4 @@ t.integer "price_cents" t.integer "bus_id" end - end From dd57c6e98a9ee173b09fb0be28449c6876ac6a70 Mon Sep 17 00:00:00 2001 From: Mair Date: Sun, 2 Mar 2025 15:55:52 +0400 Subject: [PATCH 2/4] added rspec --- .rspec | 1 + Gemfile | 4 + Gemfile.lock | 19 ++++ app/services/reloader.rb | 141 +++++++++++++++++++++++++++ lib/tasks/utils.rake | 32 +----- spec/rails_helper.rb | 68 +++++++++++++ spec/services/reloader_spec.rb | 35 +++++++ spec/spec_helper.rb | 94 ++++++++++++++++++ test/application_system_test_case.rb | 5 - test/controllers/.keep | 0 test/fixtures/.keep | 0 test/fixtures/files/.keep | 0 test/helpers/.keep | 0 test/integration/.keep | 0 test/mailers/.keep | 0 test/models/.keep | 0 test/system/.keep | 0 test/test_helper.rb | 10 -- 18 files changed, 364 insertions(+), 45 deletions(-) create mode 100644 .rspec create mode 100644 app/services/reloader.rb create mode 100644 spec/rails_helper.rb create mode 100644 spec/services/reloader_spec.rb create mode 100644 spec/spec_helper.rb delete mode 100644 test/application_system_test_case.rb delete mode 100644 test/controllers/.keep delete mode 100644 test/fixtures/.keep delete mode 100644 test/fixtures/files/.keep delete mode 100644 test/helpers/.keep delete mode 100644 test/integration/.keep delete mode 100644 test/mailers/.keep delete mode 100644 test/models/.keep delete mode 100644 test/system/.keep delete mode 100644 test/test_helper.rb diff --git a/.rspec b/.rspec new file mode 100644 index 00000000..c99d2e73 --- /dev/null +++ b/.rspec @@ -0,0 +1 @@ +--require spec_helper diff --git a/Gemfile b/Gemfile index 2f1c8acd..e890b965 100644 --- a/Gemfile +++ b/Gemfile @@ -19,3 +19,7 @@ group :development do gem "ruby-prof" gem "memory_profiler" end + +group :development, :test do + gem 'rspec-rails', '~> 7.0.0' +end diff --git a/Gemfile.lock b/Gemfile.lock index 4a9528ce..87161a05 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -82,6 +82,7 @@ GEM connection_pool (2.5.0) crass (1.0.6) date (3.4.1) + diff-lcs (1.6.0) drb (2.2.1) erubi (1.13.1) ffi (1.17.1-arm64-darwin) @@ -187,6 +188,23 @@ GEM psych (>= 4.0.0) reline (0.6.0) io-console (~> 0.5) + rspec-core (3.13.3) + rspec-support (~> 3.13.0) + rspec-expectations (3.13.3) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-mocks (3.13.2) + diff-lcs (>= 1.2.0, < 2.0) + rspec-support (~> 3.13.0) + rspec-rails (7.0.2) + actionpack (>= 7.0) + activesupport (>= 7.0) + railties (>= 7.0) + rspec-core (~> 3.13) + rspec-expectations (~> 3.13) + rspec-mocks (~> 3.13) + rspec-support (~> 3.13) + rspec-support (3.13.2) ruby-prof (1.7.1) securerandom (0.4.1) sprockets (4.2.1) @@ -223,6 +241,7 @@ DEPENDENCIES puma rack-mini-profiler rails (~> 8.0.1) + rspec-rails (~> 7.0.0) ruby-prof sprockets-rails tzinfo-data diff --git a/app/services/reloader.rb b/app/services/reloader.rb new file mode 100644 index 00000000..69363508 --- /dev/null +++ b/app/services/reloader.rb @@ -0,0 +1,141 @@ +class Reloader + def self.call(file_name) + new(file_name).call + end + + def initialize(file_name) + @file_name = file_name + @json = JSON.parse(File.read(file_name)) + end + + def call + ActiveRecord::Base.transaction do + clear_tables + collect_and_create_data + end + end + + private + + attr_reader :file_name, :json + + def clear_tables + City.delete_all + Bus.delete_all + Service.delete_all + Trip.delete_all + ActiveRecord::Base.connection.execute('DELETE FROM buses_services') + end + + def collect_and_create_data + collected_data = collect_data + created_records = create_main_records(collected_data) + create_associated_records(created_records) + end + + def collect_data + { + cities: Set.new, + buses: [], + services: Set.new + }.tap do |data| + json.each do |trip| + data[:cities] << trip['from'] + data[:cities] << trip['to'] + data[:services].merge(trip['bus']['services']) + data[:buses] << { + number: trip['bus']['number'], + model: trip['bus']['model'] + } + end + end + end + + def create_main_records(collected_data) + { + cities: create_cities(collected_data[:cities]), + services: create_services(collected_data[:services]), + buses: create_buses(collected_data[:buses]) + } + end + + def create_cities(cities) + City.insert_all( + cities.map { |name| { name: name } }, + returning: [:id, :name] + ).index_by { |city| city['name'] } + end + + def create_services(services) + Service.insert_all( + services.map { |name| { name: name } }, + returning: [:id, :name] + ).index_by { |service| service['name'] } + end + + def create_buses(buses) + Bus.insert_all( + buses.uniq { |bus| bus[:number] }, + returning: [:id, :number] + ).index_by { |bus| bus['number'] } + end + + def create_associated_records(created_records) + buses_services_data = [] + trips_data = [] + + json.each do |trip| + bus_id = created_records[:buses][trip['bus']['number']]['id'] + + collect_buses_services_data( + buses_services_data, + bus_id, + trip['bus']['services'], + created_records[:services] + ) + + collect_trips_data( + trips_data, + trip, + bus_id, + created_records[:cities] + ) + end + + create_buses_services(buses_services_data) + create_trips(trips_data) + end + + def collect_buses_services_data(data, bus_id, services, services_records) + services.each do |service_name| + data << { + bus_id: bus_id, + service_id: services_records[service_name]['id'] + } + end + end + + def collect_trips_data(data, trip, bus_id, cities) + data << { + from_id: cities[trip['from']]['id'], + to_id: cities[trip['to']]['id'], + bus_id: bus_id, + start_time: trip['start_time'], + duration_minutes: trip['duration_minutes'], + price_cents: trip['price_cents'] + } + end + + def create_buses_services(data) + return if data.empty? + + values = data.uniq.map { |r| "(#{r[:bus_id]}, #{r[:service_id]})" }.join(', ') + ActiveRecord::Base.connection.execute( + "INSERT INTO buses_services (bus_id, service_id) VALUES #{values}" + ) + end + + def create_trips(data) + Trip.insert_all!(data) + end +end diff --git a/lib/tasks/utils.rake b/lib/tasks/utils.rake index 540fe871..19510975 100644 --- a/lib/tasks/utils.rake +++ b/lib/tasks/utils.rake @@ -1,34 +1,6 @@ # Наивная загрузка данных из json-файла в БД # rake reload_json[fixtures/small.json] task :reload_json, [:file_name] => :environment do |_task, args| - json = JSON.parse(File.read(args.file_name)) - - ActiveRecord::Base.transaction do - City.delete_all - Bus.delete_all - Service.delete_all - Trip.delete_all - ActiveRecord::Base.connection.execute('delete from buses_services;') - - json.each do |trip| - from = City.find_or_create_by(name: trip['from']) - to = City.find_or_create_by(name: trip['to']) - services = [] - trip['bus']['services'].each do |service| - s = Service.find_or_create_by(name: service) - services << s - end - bus = Bus.find_or_create_by(number: trip['bus']['number']) - bus.update(model: trip['bus']['model'], services: services) - - Trip.create!( - from: from, - to: to, - bus: bus, - start_time: trip['start_time'], - duration_minutes: trip['duration_minutes'], - price_cents: trip['price_cents'], - ) - end - end + Reloader.call(args.file_name) end + diff --git a/spec/rails_helper.rb b/spec/rails_helper.rb new file mode 100644 index 00000000..0cd9a904 --- /dev/null +++ b/spec/rails_helper.rb @@ -0,0 +1,68 @@ +# This file is copied to spec/ when you run 'rails generate rspec:install' +require 'spec_helper' +ENV['RAILS_ENV'] ||= 'test' +require_relative '../config/environment' +# Prevent database truncation if the environment is production +abort("The Rails environment is running in production mode!") if Rails.env.production? +# Uncomment the line below in case you have `--require rails_helper` in the `.rspec` file +# that will avoid rails generators crashing because migrations haven't been run yet +# return unless Rails.env.test? +require 'rspec/rails' +# Add additional requires below this line. Rails is not loaded until this point! + +# Requires supporting ruby files with custom matchers and macros, etc, in +# spec/support/ and its subdirectories. Files matching `spec/**/*_spec.rb` are +# run as spec files by default. This means that files in spec/support that end +# in _spec.rb will both be required and run as specs, causing the specs to be +# run twice. It is recommended that you do not name files matching this glob to +# end with _spec.rb. You can configure this pattern with the --pattern +# option on the command line or in ~/.rspec, .rspec or `.rspec-local`. +# +# The following line is provided for convenience purposes. It has the downside +# of increasing the boot-up time by auto-requiring all files in the support +# directory. Alternatively, in the individual `*_spec.rb` files, manually +# require only the support files necessary. +# +# Rails.root.glob('spec/support/**/*.rb').sort_by(&:to_s).each { |f| require f } + +# Checks for pending migrations and applies them before tests are run. +# If you are not using ActiveRecord, you can remove these lines. +begin + ActiveRecord::Migration.maintain_test_schema! +rescue ActiveRecord::PendingMigrationError => e + abort e.to_s.strip +end +RSpec.configure do |config| + # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures + config.fixture_paths = [ + Rails.root.join('spec/fixtures') + ] + + # If you're not using ActiveRecord, or you'd prefer not to run each of your + # examples within a transaction, remove the following line or assign false + # instead of true. + config.use_transactional_fixtures = true + + # You can uncomment this line to turn off ActiveRecord support entirely. + # config.use_active_record = false + + # RSpec Rails can automatically mix in different behaviours to your tests + # based on their file location, for example enabling you to call `get` and + # `post` in specs under `spec/controllers`. + # + # You can disable this behaviour by removing the line below, and instead + # explicitly tag your specs with their type, e.g.: + # + # RSpec.describe UsersController, type: :controller do + # # ... + # end + # + # The different available types are documented in the features, such as in + # https://rspec.info/features/7-0/rspec-rails + config.infer_spec_type_from_file_location! + + # Filter lines from Rails gems in backtraces. + config.filter_rails_from_backtrace! + # arbitrary gems may also be filtered via: + # config.filter_gems_from_backtrace("gem name") +end diff --git a/spec/services/reloader_spec.rb b/spec/services/reloader_spec.rb new file mode 100644 index 00000000..505b793d --- /dev/null +++ b/spec/services/reloader_spec.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Reloader do + describe '.call' do + subject(:reload_data) { described_class.call(json_file_path) } + + let(:json_file_path) { 'fixtures/example.json' } + + it 'clears existing data' do + City.create!(name: 'DummyCity') + reload_data + + expect(City.find_by(name: 'DummyCity')).to be_nil + end + + it 'loads trips data from JSON and creates records in DB' do + reload_data + + expect(City.count).to eq(2) + expect(Bus.count).to eq(1) + expect(Service.count).to eq(2) + expect(Trip.count).to eq(10) + + moscow = City.find_by(name: 'Москва') + expect(moscow).to be_present + + bus = Bus.find_by(number: '123') + expect(bus).to be_present + expect(bus.model).to eq('Икарус') + expect(bus.services.map(&:name).uniq.size).to eq(bus.services.size) + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 00000000..327b58ea --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,94 @@ +# This file was generated by the `rails generate rspec:install` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + + # This option will default to `:apply_to_host_groups` in RSpec 4 (and will + # have no way to turn it off -- the option exists only for backwards + # compatibility in RSpec 3). It causes shared context metadata to be + # inherited by the metadata hash of host groups and examples, rather than + # triggering implicit auto-inclusion in groups with matching metadata. + config.shared_context_metadata_behavior = :apply_to_host_groups + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # This allows you to limit a spec run to individual examples or groups + # you care about by tagging them with `:focus` metadata. When nothing + # is tagged with `:focus`, all examples get run. RSpec also provides + # aliases for `it`, `describe`, and `context` that include `:focus` + # metadata: `fit`, `fdescribe` and `fcontext`, respectively. + config.filter_run_when_matching :focus + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = "spec/examples.txt" + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # https://rspec.info/features/3-12/rspec-core/configuration/zero-monkey-patching-mode/ + config.disable_monkey_patching! + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = "doc" + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed +=end +end diff --git a/test/application_system_test_case.rb b/test/application_system_test_case.rb deleted file mode 100644 index d19212ab..00000000 --- a/test/application_system_test_case.rb +++ /dev/null @@ -1,5 +0,0 @@ -require "test_helper" - -class ApplicationSystemTestCase < ActionDispatch::SystemTestCase - driven_by :selenium, using: :chrome, screen_size: [1400, 1400] -end diff --git a/test/controllers/.keep b/test/controllers/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/test/fixtures/.keep b/test/fixtures/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/test/fixtures/files/.keep b/test/fixtures/files/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/test/helpers/.keep b/test/helpers/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/test/integration/.keep b/test/integration/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/test/mailers/.keep b/test/mailers/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/test/models/.keep b/test/models/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/test/system/.keep b/test/system/.keep deleted file mode 100644 index e69de29b..00000000 diff --git a/test/test_helper.rb b/test/test_helper.rb deleted file mode 100644 index 3ab84e3d..00000000 --- a/test/test_helper.rb +++ /dev/null @@ -1,10 +0,0 @@ -ENV['RAILS_ENV'] ||= 'test' -require_relative '../config/environment' -require 'rails/test_help' - -class ActiveSupport::TestCase - # Setup all fixtures in test/fixtures/*.yml for all tests in alphabetical order. - fixtures :all - - # Add more helper methods to be used by all tests here... -end From 0b041c1374e01db79eb8f60ad852d37534ef9efa Mon Sep 17 00:00:00 2001 From: Mair Date: Sun, 2 Mar 2025 17:06:40 +0400 Subject: [PATCH 3/4] optimization --- .gitignore | 1 - case-study.md | 22 +++++++++++++++++++++- lib/profilers/profile.rb | 18 ++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 lib/profilers/profile.rb diff --git a/.gitignore b/.gitignore index 16d618bc..768b01c7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,4 @@ /tmp /log /public -/profile_reports /.idea diff --git a/case-study.md b/case-study.md index 28eee554..e34c859d 100644 --- a/case-study.md +++ b/case-study.md @@ -18,6 +18,8 @@ CPU usage Количество SQL запросов Время выполнения SQL запросов +Для контроля корректности работы скрипта после внесения изменений, я сначала создал тест (spec/services/reloader_spec.rb) + ## Гарантия корректности Для обеспечения корректности оптимизации: @@ -30,7 +32,7 @@ CPU usage Для импорта: Подготовлены тестовые JSON файлы разного размера (small.json, medium.jsom, large.json) -pg_hero +Добавил lib/profilers/profile.rb (запуск через rails runner lib/profilers/profile.rb), который позволил мне оперативно оценивать изменение метрики. Для отображения: rack-mini-profiler для профилирования рендеринга @@ -56,3 +58,21 @@ pghero для анализа производительности PostgreSQL Время рендеринга страницы Самара-Москва: Y мс Количество SQL запросов: уменьшено с Z до W Этот case-study будет дополняться конкретными цифрами и деталями реализации по мере выполнения оптимизации. + +### №1 Замена find_or_create_by на batch insert +- ruby-prof +- find_or_create_by создает отдельную транзакцию для каждого города. +- Использование Set для уникальных значений и batch insert существенно снизило количество запросов к БД + +### №2 Замена find_or_create_by на batch insert +- ruby-prof +- Каждый update создавал новую транзакцию и требовал загрузки связанных объектов +- Прямой SQL insert значительно эффективнее для массовой вставки + +### №3 Предварительный сбор и подготовка данных +- memory_profiler показал высокое потребление памяти при создании объектов внутри цикла. +- Разделение процесса на сбор данных и их сохранение позволило эффективнее использовать память. + +## Итого: +Время выполнения для small.json уменьшилось с 27 секунд до 0.3 секунд. +Время выполнения для large.json составила 12 секунд, что укладывается в бюджет. diff --git a/lib/profilers/profile.rb b/lib/profilers/profile.rb new file mode 100644 index 00000000..e3898b00 --- /dev/null +++ b/lib/profilers/profile.rb @@ -0,0 +1,18 @@ +# frozen_string_literal: true + +require 'ruby-prof' +require 'fileutils' +require_relative '../../app/services/reloader' + +REPORTS_DIR = 'tmp/profile_reports/ruby_prof_reports' + +FileUtils.rm_rf(REPORTS_DIR) +FileUtils.mkdir_p(REPORTS_DIR) + +profile = RubyProf::Profile.new(measure_mode: RubyProf::WALL_TIME) +result = profile.profile { Reloader.call("fixtures/large.json") } + +RubyProf::FlatPrinter.new(result).print(File.open("#{REPORTS_DIR}/flat.txt", "w+")) +RubyProf::GraphHtmlPrinter.new(result).print(File.open("#{REPORTS_DIR}/graph.html", "w+")) +RubyProf::CallStackPrinter.new(result).print(File.open("#{REPORTS_DIR}/callstack.html", 'w+')) +RubyProf::CallTreePrinter.new(result).print(path: REPORTS_DIR, profile: 'callgrind') From a89803e248236df3d402f5e2d34f4f2a80fa8fbd Mon Sep 17 00:00:00 2001 From: Mair Date: Sun, 2 Mar 2025 18:12:55 +0400 Subject: [PATCH 4/4] optimization 2 --- app/controllers/trips_controller.rb | 2 +- app/views/trips/_trip.html.erb | 16 +++++++--- app/views/trips/index.html.erb | 12 ++----- case-study.md | 31 +++++++++++++++++-- ...135224_add_index_to_trips_from_id_to_id.rb | 11 +++++++ ...50302140320_add_index_to_buses_services.rb | 11 +++++++ db/schema.rb | 4 ++- 7 files changed, 69 insertions(+), 18 deletions(-) create mode 100644 db/migrate/20250302135224_add_index_to_trips_from_id_to_id.rb create mode 100644 db/migrate/20250302140320_add_index_to_buses_services.rb diff --git a/app/controllers/trips_controller.rb b/app/controllers/trips_controller.rb index acb38be2..66346266 100644 --- a/app/controllers/trips_controller.rb +++ b/app/controllers/trips_controller.rb @@ -2,6 +2,6 @@ class TripsController < ApplicationController def index @from = City.find_by_name!(params[:from]) @to = City.find_by_name!(params[:to]) - @trips = Trip.where(from: @from, to: @to).order(:start_time) + @trips = Trip.preload(:from, :to, bus: :services).where(from: @from, to: @to) end end diff --git a/app/views/trips/_trip.html.erb b/app/views/trips/_trip.html.erb index fa1de9aa..4faca8cc 100644 --- a/app/views/trips/_trip.html.erb +++ b/app/views/trips/_trip.html.erb @@ -1,5 +1,11 @@ -
  • <%= "Отправление: #{trip.start_time}" %>
  • -
  • <%= "Прибытие: #{(Time.parse(trip.start_time) + trip.duration_minutes.minutes).strftime('%H:%M')}" %>
  • -
  • <%= "В пути: #{trip.duration_minutes / 60}ч. #{trip.duration_minutes % 60}мин." %>
  • -
  • <%= "Цена: #{trip.price_cents / 100}р. #{trip.price_cents % 100}коп." %>
  • -
  • <%= "Автобус: #{trip.bus.model} №#{trip.bus.number}" %>
  • +
      +
    • <%= "Отправление: #{trip.start_time}" %>
    • +
    • <%= "Прибытие: #{(Time.parse(trip.start_time) + trip.duration_minutes.minutes).strftime('%H:%M')}" %>
    • +
    • <%= "В пути: #{trip.duration_minutes / 60}ч. #{trip.duration_minutes % 60}мин." %>
    • +
    • <%= "Цена: #{trip.price_cents / 100}р. #{trip.price_cents % 100}коп." %>
    • +
    • <%= "Автобус: #{trip.bus.model} №#{trip.bus.number}" %>
    • + + <% if trip.bus.services.present? %> + <%= render "services", services: trip.bus.services %> + <% end %> +
    diff --git a/app/views/trips/index.html.erb b/app/views/trips/index.html.erb index a60bce41..51510692 100644 --- a/app/views/trips/index.html.erb +++ b/app/views/trips/index.html.erb @@ -5,12 +5,6 @@ <%= "В расписании #{@trips.count} рейсов" %> -<% @trips.each do |trip| %> -
      - <%= render "trip", trip: trip %> - <% if trip.bus.services.present? %> - <%= render "services", services: trip.bus.services %> - <% end %> -
    - <%= render "delimiter" %> -<% end %> +
    + <%= render partial: "trip", collection: @trips, spacer_template: "delimiter" %> +
    diff --git a/case-study.md b/case-study.md index e34c859d..0c4a0c0c 100644 --- a/case-study.md +++ b/case-study.md @@ -31,7 +31,7 @@ CPU usage Построен быстрый цикл обратной связи: Для импорта: -Подготовлены тестовые JSON файлы разного размера (small.json, medium.jsom, large.json) +Подготовлены тестовые JSON файлы разного размера (small.json, medium.json, large.json) Добавил lib/profilers/profile.rb (запуск через rails runner lib/profilers/profile.rb), который позволил мне оперативно оценивать изменение метрики. Для отображения: @@ -48,7 +48,7 @@ stackprof для профилирования CPU Для отображения: rack-mini-profiler -bullet для выявления N+1 запросов +bullet для выявления N+1 запросов (в итоге не стал им пользоваться, инструмента rack-mini-profiler хватило с головой) pghero для анализа производительности PostgreSQL ## Результаты оптимизации @@ -76,3 +76,30 @@ pghero для анализа производительности PostgreSQL ## Итого: Время выполнения для small.json уменьшилось с 27 секунд до 0.3 секунд. Время выполнения для large.json составила 12 секунд, что укладывается в бюджет. + +### №4 Устранение N+1 запросов +- rack-mini-profiler +- Каждая итерация по @trips.each порождала дополнительные запросы к связанным таблицам +- Метрика изменилась с 12 секунд и 1768 sql запросов на странице "/автобусы/Москва/Самара" на файле large.json до 2.3 секунд и 7 sql запросов + +### №5 Медленный запрос +``` + Trip Count (21.2ms) SELECT COUNT(*) FROM "trips" WHERE "trips"."from_id" = $1 AND "trips"."to_id" = $2 [["from_id", 252], ["to_id", 254]] +``` +- Логи rails и rack-mini-profiler +- Проверил как выполняется запрос в pg_hero, используется Seq Scan, Pg_hero предложил добавить индекс CREATE INDEX CONCURRENTLY ON trips (from_id, to_id) +- Скорость выполнения запроса сократилась с 21ms до 1.5ms + +### №6 Медленный запрос +``` + SELECT "buses_services".* FROM "buses_services" WHERE "buses_services"."bus_id" IN (...) +``` +- rack-mini-profiler +- Проверил как выполняется запрос в pg_hero, используется Seq Scan, Pg_hero предложил добавить индекс CREATE INDEX CONCURRENTLY ON trips (from_id, to_id) +- Скорость выполнения запроса сократилась с 28ms до 20ms + +### №7 Множественный рендеринг partial +- В логах rack-mini-profiler видно множественный рендеринг одного и того же partial _services.html.erb +- Текущая реализация в index.html.erb использует цикл с отдельным рендерингом для каждой поездки +- Заменили на использование collection rendering +- Время рендеринга страницы сократилось с 2с до 1с diff --git a/db/migrate/20250302135224_add_index_to_trips_from_id_to_id.rb b/db/migrate/20250302135224_add_index_to_trips_from_id_to_id.rb new file mode 100644 index 00000000..3081fc67 --- /dev/null +++ b/db/migrate/20250302135224_add_index_to_trips_from_id_to_id.rb @@ -0,0 +1,11 @@ +class AddIndexToTripsFromIdToId < ActiveRecord::Migration[8.0] + disable_ddl_transaction! + + def up + add_index :trips, [:from_id, :to_id], algorithm: :concurrently + end + + def down + remove_index :trips, [:from_id, :to_id], algorithm: :concurrently + end +end diff --git a/db/migrate/20250302140320_add_index_to_buses_services.rb b/db/migrate/20250302140320_add_index_to_buses_services.rb new file mode 100644 index 00000000..59c4cd98 --- /dev/null +++ b/db/migrate/20250302140320_add_index_to_buses_services.rb @@ -0,0 +1,11 @@ +class AddIndexToBusesServices < ActiveRecord::Migration[8.0] + disable_ddl_transaction! + + def up + add_index :buses_services, :bus_id, algorithm: :concurrently + end + + def down + remove_index :buses_services, :bus_id, algorithm: :concurrently + end +end diff --git a/db/schema.rb b/db/schema.rb index 9229b14f..c155af24 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2025_02_27_171011) do +ActiveRecord::Schema[8.0].define(version: 2025_03_02_140320) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" enable_extension "pg_stat_statements" @@ -23,6 +23,7 @@ create_table "buses_services", force: :cascade do |t| t.integer "bus_id" t.integer "service_id" + t.index ["bus_id"], name: "index_buses_services_on_bus_id" end create_table "cities", force: :cascade do |t| @@ -60,5 +61,6 @@ t.integer "duration_minutes" t.integer "price_cents" t.integer "bus_id" + t.index ["from_id", "to_id"], name: "index_trips_on_from_id_and_to_id" end end