Retrieving structure files from PDB
The schema of a local CAZyme database can be retrieved using cazy_webscraper
:
Alternatively, sqlite3 can be used to retrieve the schema:
A visual representation of the db schema when using cazy_webscraper version >= 2.3.0 can be found here.
As of cazy_webscraper version >= 2.3.0, the schema of a local CAZyme database will be:
CREATE TABLE IF NOT EXISTS "Kingdoms" (
kingdom_id INTEGER NOT NULL,
kingdom VARCHAR,
PRIMARY KEY (kingdom_id),
UNIQUE (kingdom)
);
CREATE TABLE IF NOT EXISTS "GtdbTaxs" (
gtdb_tax_id INTEGER NOT NULL,
kingdom VARCHAR,
phylum VARCHAR,
tax_class VARCHAR,
tax_order VARCHAR,
family VARCHAR,
genus VARCHAR,
species VARCHAR,
release VARCHAR,
PRIMARY KEY (gtdb_tax_id),
UNIQUE (kingdom, phylum, tax_class, tax_order, family, genus, species, release)
);
CREATE TABLE IF NOT EXISTS "CazyFamilies" (
family_id INTEGER NOT NULL,
family VARCHAR NOT NULL,
subfamily VARCHAR,
PRIMARY KEY (family_id),
UNIQUE (family, subfamily)
);
CREATE INDEX fam_index ON "CazyFamilies" (family, subfamily);
CREATE TABLE IF NOT EXISTS "NcbiTaxs" (
ncbi_tax_id INTEGER NOT NULL,
kingdom VARCHAR,
phylum VARCHAR,
tax_class VARCHAR,
tax_order VARCHAR,
family VARCHAR,
genus VARCHAR,
species VARCHAR,
strain VARCHAR,
PRIMARY KEY (ncbi_tax_id),
UNIQUE (ncbi_tax_id)
);
CREATE INDEX ncbi_index ON "NcbiTaxs" (ncbi_tax_id, genus, species);
CREATE TABLE IF NOT EXISTS "Uniprots" (
uniprot_id INTEGER NOT NULL,
uniprot_accession VARCHAR,
uniprot_name VARCHAR,
sequence VARCHAR,
seq_update_date VARCHAR,
PRIMARY KEY (uniprot_id),
UNIQUE (uniprot_accession)
);
CREATE INDEX uniprot_option ON "Uniprots" (uniprot_id, uniprot_accession);
CREATE TABLE IF NOT EXISTS "Ecs" (
ec_id INTEGER NOT NULL,
ec_number VARCHAR,
PRIMARY KEY (ec_id),
UNIQUE (ec_number)
);
CREATE INDEX "ix_Ecs_ec_number" ON "Ecs" (ec_number);
CREATE TABLE IF NOT EXISTS "Pdbs" (
pdb_id INTEGER NOT NULL,
pdb_accession VARCHAR,
PRIMARY KEY (pdb_id),
UNIQUE (pdb_accession)
);
CREATE INDEX pdb_idx ON "Pdbs" (pdb_accession);
CREATE TABLE IF NOT EXISTS "Logs" (
log_id INTEGER NOT NULL,
date VARCHAR,
time VARCHAR,
"database" VARCHAR,
retrieved_annotations VARCHAR,
classes VARCHAR,
families VARCHAR,
kingdoms VARCHAR,
genera_filter VARCHAR,
species_filter VARCHAR,
strains_filter VARCHAR,
ec_filter VARCHAR,
cmd_line VARCHAR,
PRIMARY KEY (log_id)
);
CREATE TABLE IF NOT EXISTS "Taxs" (
taxonomy_id INTEGER NOT NULL,
genus VARCHAR,
species VARCHAR,
kingdom_id INTEGER,
PRIMARY KEY (taxonomy_id),
UNIQUE (genus, species),
FOREIGN KEY(kingdom_id) REFERENCES "Kingdoms" (kingdom_id)
);
CREATE INDEX organism_option ON "Taxs" (taxonomy_id, genus, species);
CREATE TABLE IF NOT EXISTS "Genomes" (
genome_id INTEGER NOT NULL,
assembly_name VARCHAR,
gbk_version_accession VARCHAR,
gbk_ncbi_id INTEGER,
refseq_version_accession VARCHAR,
refseq_ncbi_id INTEGER,
gtdb_tax_id INTEGER,
PRIMARY KEY (genome_id),
UNIQUE (assembly_name, gbk_version_accession, refseq_version_accession),
FOREIGN KEY(gtdb_tax_id) REFERENCES "GtdbTaxs" (gtdb_tax_id)
);
CREATE INDEX genome_options ON "Genomes" (assembly_name, gbk_version_accession, refseq_version_accession);
CREATE TABLE IF NOT EXISTS "Genbanks" (
genbank_id INTEGER NOT NULL,
genbank_accession VARCHAR,
sequence VARCHAR,
seq_update_date VARCHAR,
taxonomy_id INTEGER,
ncbi_tax_id INTEGER,
uniprot_id INTEGER,
PRIMARY KEY (genbank_id),
UNIQUE (genbank_accession),
FOREIGN KEY(taxonomy_id) REFERENCES "Taxs" (taxonomy_id),
FOREIGN KEY(ncbi_tax_id) REFERENCES "NcbiTaxs" (ncbi_tax_id),
FOREIGN KEY(uniprot_id) REFERENCES "Uniprots" (uniprot_id)
);
CREATE INDEX "ix_Genbanks_genbank_accession" ON "Genbanks" (genbank_accession);
CREATE TABLE IF NOT EXISTS "Genbanks_Genomes" (
genbank_id INTEGER NOT NULL,
genome_id INTEGER NOT NULL,
PRIMARY KEY (genbank_id, genome_id),
FOREIGN KEY(genbank_id) REFERENCES "Genbanks" (genbank_id),
FOREIGN KEY(genome_id) REFERENCES "Genomes" (genome_id)
);
CREATE TABLE IF NOT EXISTS "Genbanks_CazyFamilies" (
genbank_id INTEGER NOT NULL,
family_id INTEGER NOT NULL,
PRIMARY KEY (genbank_id, family_id),
FOREIGN KEY(genbank_id) REFERENCES "Genbanks" (genbank_id),
FOREIGN KEY(family_id) REFERENCES "CazyFamilies" (family_id)
);
CREATE TABLE IF NOT EXISTS "Genbanks_Ecs" (
genbank_id INTEGER NOT NULL,
ec_id INTEGER NOT NULL,
PRIMARY KEY (genbank_id, ec_id),
FOREIGN KEY(genbank_id) REFERENCES "Genbanks" (genbank_id),
FOREIGN KEY(ec_id) REFERENCES "Ecs" (ec_id)
);
CREATE TABLE IF NOT EXISTS "Genbanks_Pdbs" (
genbank_id INTEGER NOT NULL,
pdb_id INTEGER NOT NULL,
PRIMARY KEY (genbank_id, pdb_id),
FOREIGN KEY(genbank_id) REFERENCES "Genbanks" (genbank_id),
FOREIGN KEY(pdb_id) REFERENCES "Pdbs" (pdb_id)
);