Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 0a38c7e

Browse files
author
David Robertson
authoredSep 26, 2022
Snapshot schema 72 (#13873)
Including another batch of fixes to the schema dump script
1 parent 41461fd commit 0a38c7e

File tree

13 files changed

+2165
-22
lines changed

13 files changed

+2165
-22
lines changed
 

‎changelog.d/13873.misc

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Create a new snapshot of the database schema.

‎scripts-dev/make_full_schema.sh

+46-14
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ usage() {
2626
echo " Defaults to 9999."
2727
echo "-h"
2828
echo " Display this help text."
29+
echo ""
30+
echo " NB: make sure to run this against the *oldest* supported version of postgres,"
31+
echo " or else pg_dump might output non-backwards-compatible syntax."
2932
}
3033

3134
SCHEMA_NUMBER="9999"
@@ -240,25 +243,54 @@ DROP TABLE user_directory_search_stat;
240243

241244
echo "Dumping SQLite3 schema..."
242245

243-
mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schema/$SCHEMA_NUMBER"
244-
sqlite3 "$SQLITE_COMMON_DB" ".schema --indent" > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
245-
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
246-
sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
247-
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
248-
sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
249-
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.sqlite"
246+
mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER"
247+
sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
248+
sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
249+
sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
250+
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
251+
sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
252+
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite"
250253

251254
cleanup_pg_schema() {
252-
sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
255+
# Cleanup as follows:
256+
# - Remove empty lines. pg_dump likes to output a lot of these.
257+
# - Remove comment-only lines. pg_dump also likes to output a lot of these to visually
258+
# separate tables etc.
259+
# - Remove "public." prefix --- the schema name.
260+
# - Remove "SET" commands. Last time I ran this, the output commands were
261+
# SET statement_timeout = 0;
262+
# SET lock_timeout = 0;
263+
# SET idle_in_transaction_session_timeout = 0;
264+
# SET client_encoding = 'UTF8';
265+
# SET standard_conforming_strings = on;
266+
# SET check_function_bodies = false;
267+
# SET xmloption = content;
268+
# SET client_min_messages = warning;
269+
# SET row_security = off;
270+
# SET default_table_access_method = heap;
271+
# - Very carefully remove specific SELECT statements. We CANNOT blanket remove all
272+
# SELECT statements because some of those have side-effects which we do want in the
273+
# schema. Last time I ran this, the only SELECTS were
274+
# SELECT pg_catalog.set_config('search_path', '', false);
275+
# and
276+
# SELECT pg_catalog.setval(text, bigint, bool);
277+
# We do want to remove the former, but the latter is important. If the last argument
278+
# is `true` or omitted, this marks the given integer as having been consumed and
279+
# will NOT appear as the nextval.
280+
sed -e '/^$/d' \
281+
-e '/^--/d' \
282+
-e 's/public\.//g' \
283+
-e '/^SET /d' \
284+
-e '/^SELECT pg_catalog.set_config/d'
253285
}
254286

255287
echo "Dumping Postgres schema..."
256288

257-
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
258-
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
259-
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
260-
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
261-
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
262-
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schema/$SCHEMA_NUMBER/full.sql.postgres"
289+
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
290+
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
291+
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
292+
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
293+
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
294+
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres"
263295

264296
echo "Done! Files dumped to: $OUTPUT_DIR"

‎synapse/storage/database.py

+8
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,14 @@ def execute(self, sql: str, *args: Any) -> None:
393393
def executemany(self, sql: str, *args: Any) -> None:
394394
self._do_execute(self.txn.executemany, sql, *args)
395395

396+
def executescript(self, sql: str) -> None:
397+
if isinstance(self.database_engine, Sqlite3Engine):
398+
self._do_execute(self.txn.executescript, sql) # type: ignore[attr-defined]
399+
else:
400+
raise NotImplementedError(
401+
f"executescript only exists for sqlite driver, not {type(self.database_engine)}"
402+
)
403+
396404
def _make_sql_one_line(self, sql: str) -> str:
397405
"Strip newlines out of SQL so that the loggers in the DB are on one line"
398406
return " ".join(line.strip() for line in sql.splitlines() if line.strip())

‎synapse/storage/engines/_base.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ class IncorrectDatabaseSetup(RuntimeError):
3232

3333

3434
ConnectionType = TypeVar("ConnectionType", bound=Connection)
35+
CursorType = TypeVar("CursorType", bound=Cursor)
3536

3637

37-
class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta):
38+
class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCMeta):
3839
def __init__(self, module: DBAPI2Module, config: Mapping[str, Any]):
3940
self.module = module
4041

@@ -64,7 +65,7 @@ def check_database(
6465
...
6566

6667
@abc.abstractmethod
67-
def check_new_database(self, txn: Cursor) -> None:
68+
def check_new_database(self, txn: CursorType) -> None:
6869
"""Gets called when setting up a brand new database. This allows us to
6970
apply stricter checks on new databases versus existing database.
7071
"""
@@ -124,3 +125,21 @@ def attempt_to_set_isolation_level(
124125
Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default.
125126
"""
126127
...
128+
129+
@staticmethod
130+
@abc.abstractmethod
131+
def executescript(cursor: CursorType, script: str) -> None:
132+
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
133+
134+
This is not provided by DBAPI2, and so needs engine-specific support.
135+
"""
136+
...
137+
138+
@classmethod
139+
def execute_script_file(cls, cursor: CursorType, filepath: str) -> None:
140+
"""Execute a file containing multiple semicolon-delimited SQL statements.
141+
142+
This is not provided by DBAPI2, and so needs engine-specific support.
143+
"""
144+
with open(filepath, "rt") as f:
145+
cls.executescript(cursor, f.read())

‎synapse/storage/engines/postgres.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
logger = logging.getLogger(__name__)
3232

3333

34-
class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]):
34+
class PostgresEngine(
35+
BaseDatabaseEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor]
36+
):
3537
def __init__(self, database_config: Mapping[str, Any]):
3638
super().__init__(psycopg2, database_config)
3739
psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
@@ -212,3 +214,11 @@ def attempt_to_set_isolation_level(
212214
else:
213215
isolation_level = self.isolation_level_map[isolation_level]
214216
return conn.set_isolation_level(isolation_level)
217+
218+
@staticmethod
219+
def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None:
220+
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
221+
222+
Psycopg2 seems happy to do this in DBAPI2's `execute()` function.
223+
"""
224+
cursor.execute(script)

‎synapse/storage/engines/sqlite.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from synapse.storage.database import LoggingDatabaseConnection
2525

2626

27-
class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]):
27+
class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]):
2828
def __init__(self, database_config: Mapping[str, Any]):
2929
super().__init__(sqlite3, database_config)
3030

@@ -120,6 +120,25 @@ def attempt_to_set_isolation_level(
120120
# All transactions are SERIALIZABLE by default in sqlite
121121
pass
122122

123+
@staticmethod
124+
def executescript(cursor: sqlite3.Cursor, script: str) -> None:
125+
"""Execute a chunk of SQL containing multiple semicolon-delimited statements.
126+
127+
Python's built-in SQLite driver does not allow you to do this with DBAPI2's
128+
`execute`:
129+
130+
> execute() will only execute a single SQL statement. If you try to execute more
131+
> than one statement with it, it will raise a Warning. Use executescript() if
132+
> you want to execute multiple SQL statements with one call.
133+
134+
Though the docs for `executescript` warn:
135+
136+
> If there is a pending transaction, an implicit COMMIT statement is executed
137+
> first. No other implicit transaction control is performed; any transaction
138+
> control must be added to sql_script.
139+
"""
140+
cursor.executescript(script)
141+
123142

124143
# Following functions taken from: https://github.com/coleifer/peewee
125144

‎synapse/storage/prepare_database.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def _setup_new_database(
266266
".sql." + specific
267267
):
268268
logger.debug("Applying schema %s", entry.absolute_path)
269-
executescript(cur, entry.absolute_path)
269+
database_engine.execute_script_file(cur, entry.absolute_path)
270270

271271
cur.execute(
272272
"INSERT INTO schema_version (version, upgraded) VALUES (?,?)",
@@ -517,15 +517,15 @@ def _upgrade_existing_database(
517517
UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
518518
)
519519
logger.info("Applying schema %s", relative_path)
520-
executescript(cur, absolute_path)
520+
database_engine.execute_script_file(cur, absolute_path)
521521
elif ext == specific_engine_extension and root_name.endswith(".sql"):
522522
# A .sql file specific to our engine; just read and execute it
523523
if is_worker:
524524
raise PrepareDatabaseException(
525525
UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path
526526
)
527527
logger.info("Applying engine-specific schema %s", relative_path)
528-
executescript(cur, absolute_path)
528+
database_engine.execute_script_file(cur, absolute_path)
529529
elif ext in specific_engine_extensions and root_name.endswith(".sql"):
530530
# A .sql file for a different engine; skip it.
531531
continue
@@ -666,7 +666,7 @@ def _get_or_create_schema_state(
666666
) -> Optional[_SchemaState]:
667667
# Bluntly try creating the schema_version tables.
668668
sql_path = os.path.join(schema_path, "common", "schema_version.sql")
669-
executescript(txn, sql_path)
669+
database_engine.execute_script_file(txn, sql_path)
670670

671671
txn.execute("SELECT version, upgraded FROM schema_version")
672672
row = txn.fetchone()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CREATE TABLE background_updates (
2+
update_name text NOT NULL,
3+
progress_json text NOT NULL,
4+
depends_on text,
5+
ordering integer DEFAULT 0 NOT NULL
6+
);
7+
ALTER TABLE ONLY background_updates
8+
ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
CREATE TABLE background_updates (
2+
update_name text NOT NULL,
3+
progress_json text NOT NULL,
4+
depends_on text, ordering INT NOT NULL DEFAULT 0,
5+
CONSTRAINT background_updates_uniqueness UNIQUE (update_name)
6+
);

‎synapse/storage/schema/main/full_schemas/72/full.sql.postgres

+1,344
Large diffs are not rendered by default.

‎synapse/storage/schema/main/full_schemas/72/full.sql.sqlite

+646
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
CREATE TABLE state_group_edges (
2+
state_group bigint NOT NULL,
3+
prev_state_group bigint NOT NULL
4+
);
5+
CREATE SEQUENCE state_group_id_seq
6+
START WITH 1
7+
INCREMENT BY 1
8+
NO MINVALUE
9+
NO MAXVALUE
10+
CACHE 1;
11+
CREATE TABLE state_groups (
12+
id bigint NOT NULL,
13+
room_id text NOT NULL,
14+
event_id text NOT NULL
15+
);
16+
CREATE TABLE state_groups_state (
17+
state_group bigint NOT NULL,
18+
room_id text NOT NULL,
19+
type text NOT NULL,
20+
state_key text NOT NULL,
21+
event_id text NOT NULL
22+
);
23+
ALTER TABLE ONLY state_groups_state ALTER COLUMN state_group SET (n_distinct=-0.02);
24+
ALTER TABLE ONLY state_groups
25+
ADD CONSTRAINT state_groups_pkey PRIMARY KEY (id);
26+
CREATE INDEX state_group_edges_prev_idx ON state_group_edges USING btree (prev_state_group);
27+
CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges USING btree (state_group, prev_state_group);
28+
CREATE INDEX state_groups_room_id_idx ON state_groups USING btree (room_id);
29+
CREATE INDEX state_groups_state_type_idx ON state_groups_state USING btree (state_group, type, state_key);
30+
SELECT pg_catalog.setval('state_group_id_seq', 1, false);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
CREATE TABLE state_groups (
2+
id BIGINT PRIMARY KEY,
3+
room_id TEXT NOT NULL,
4+
event_id TEXT NOT NULL
5+
);
6+
CREATE TABLE state_groups_state (
7+
state_group BIGINT NOT NULL,
8+
room_id TEXT NOT NULL,
9+
type TEXT NOT NULL,
10+
state_key TEXT NOT NULL,
11+
event_id TEXT NOT NULL
12+
);
13+
CREATE TABLE state_group_edges (
14+
state_group BIGINT NOT NULL,
15+
prev_state_group BIGINT NOT NULL
16+
);
17+
CREATE INDEX state_group_edges_prev_idx ON state_group_edges (prev_state_group);
18+
CREATE INDEX state_groups_state_type_idx ON state_groups_state (state_group, type, state_key);
19+
CREATE INDEX state_groups_room_id_idx ON state_groups (room_id) ;
20+
CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges (state_group, prev_state_group) ;

0 commit comments

Comments
 (0)
This repository has been archived.