diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
new file mode 100644
index 000000000..a0702a3df
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
@@ -0,0 +1,97 @@
+ drop table if exists :SCHEMA.uniprotgenes;
+
+ CREATE UNLOGGED TABLE :SCHEMA.uniprotGenes AS
+ SELECT DISTINCT ed.name
+ , d.*
+ , edr.version
+ , aa.source_id
+ , pa.gene_source_id
+ , CASE WHEN (ed.name like '%SWISSPROT%' AND edr.version = 'xrefuniparc') THEN 1
+ WHEN (ed.name like '%SPTREMBL%' AND edr.version = 'xrefuniparc') THEN 2
+ WHEN (ed.name like '%SWISSPROT%' AND edr.version = 'xref_sprot_blastp') THEN 4
+ WHEN (ed.name like '%SPTREMBL%' and edr.version = 'xref_trembl_blastp') THEN 5
+ ELSE 6 END as rank
+ , (af.last_residue_index - af.first_residue_index + 1) as hit_length
+ FROM sres.dbref d
+ LEFT JOIN apidb.AlphaFold af ON d.primary_identifier = af.uniprot_id
+ , sres.externaldatabase ed
+ , sres.externaldatabaserelease edr
+ , dots.dbrefaafeature db
+ , dots.aafeature aa
+ , :SCHEMA.ProteinAttributes pa
+ WHERE (ed.name = 'Uniprot/SWISSPROT' OR ed.name = 'Uniprot/SPTREMBL')
+ AND (edr.version = 'xrefuniparc' OR edr.version = 'xref_sprot_blastp' OR edr.version = 'xref_trembl_blastp')
+ AND edr.external_database_id = ed.external_database_id
+ AND d.external_database_release_id = edr.external_database_release_id
+ AND db.db_ref_id = d.db_ref_id
+ AND aa.aa_feature_id = db.aa_feature_id
+ AND pa.source_id = aa.source_id
+ UNION
+ SELECT DISTINCT ed.name
+ , d.*
+ , edr.version
+ , na.source_id
+ , ta.gene_source_id
+ , 3 as rank
+ , (af.last_residue_index - af.first_residue_index + 1) as hit_length
+ FROM sres.dbref d
+ LEFT JOIN apidb.AlphaFold af ON d.primary_identifier = af.uniprot_id
+ , sres.externaldatabase ed
+ , sres.externaldatabaserelease edr
+ , dots.dbrefnafeature db
+ , dots.nafeature na
+ , :SCHEMA.TranscriptAttributes ta
+ WHERE ed.name like '%_dbxref_%niprot_%RSRC'
+ AND edr.external_database_id = ed.external_database_id
+ AND d.external_database_release_id = edr.external_database_release_id
+ AND db.db_ref_id = d.db_ref_id
+ AND na.na_feature_id = db.na_feature_id
+ AND (ta.transcript_source_id = na.source_id OR ta.gene_source_id = na.source_id)
+
+ ;
+
+ drop table if exists :SCHEMA.minrank
+;
+ CREATE UNLOGGED TABLE :SCHEMA.minRank AS (
+ SELECT gene_source_id
+ , MIN(rank) as min_rank
+ FROM :SCHEMA.uniprotGenes upg
+ WHERE hit_length is not null
+ GROUP BY gene_source_id
+ )
+
+ ;
+
+ drop table if exists :SCHEMA.alphafoldhits;
+
+ CREATE UNLOGGED TABLE :SCHEMA.alphaFoldHits AS (
+ SELECT DISTINCT gene_source_id
+ , last_value(primary_identifier) over (PARTITION BY gene_source_id ORDER BY hit_length ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS uniprot_id
+ FROM (
+ SELECT upg.*
+ FROM :SCHEMA.uniprotGenes upg
+ , :SCHEMA.minRank
+ WHERE upg.gene_source_id = minRank.gene_source_id
+ AND upg.rank = minRank.min_rank
+ ) t
+ )
+ ;
+
+ CREATE TABLE :SCHEMA.AlphaFoldGenes AS (
+ SELECT afh.gene_source_id
+ , af.uniprot_id
+ , af.source_id as alphafold_id
+ , af.alphafold_version
+ , af.first_residue_index
+ , af.last_residue_index
+ FROM apidb.alphafold af
+ , :SCHEMA.alphaFoldHits afh
+ WHERE afh.uniprot_id = af.uniprot_id
+ )
+
+ ;
+
+ drop table if exists :SCHEMA.uniprotgenes;
+ drop table if exists :SCHEMA.minrank;
+ drop table if exists :SCHEMA.alphafoldhits;
+
diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql
new file mode 100644
index 000000000..5a938f861
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql
@@ -0,0 +1,3 @@
+ CREATE index AlphaFoldGenes_idx ON :SCHEMA.AlphaFoldGenes (gene_source_id, uniprot_id)
+
+ ;
diff --git a/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql
new file mode 100644
index 000000000..919cb8a61
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql
@@ -0,0 +1,21 @@
+create table :SCHEMA.GroupDomainDescriptions as
+SELECT og.group_id AS group_name, ag.descriptions
+FROM apidb.OrthologGroup og,
+ (SELECT group_name,
+ STRING_AGG(accession ||' (' || num_proteins|| ')', ', ') AS descriptions
+ FROM (SELECT group_name, accession, num_proteins, rnk
+ FROM (SELECT group_name, accession, num_proteins,
+ rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk
+ FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins
+ FROM :SCHEMA.ProteinDomainAssignment
+ GROUP BY group_name,accession
+ )
+ )
+ WHERE rnk <= 3
+ )
+ GROUP BY group_name
+ ORDER BY 1
+ ) ag
+WHERE og.group_id = ag.group_name
+
+;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql b/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql
new file mode 100644
index 000000000..27ed2e9f4
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql
@@ -0,0 +1,2 @@
+CREATE INDEX GroupDomainAttribute_idx ON :SCHEMA.GroupDomainDescriptions (group_name)
+;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/LoadOrthologTables.psql b/Model/lib/psql/webready/comparative/LoadOrthologTables.psql
new file mode 100644
index 000000000..ece00e833
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/LoadOrthologTables.psql
@@ -0,0 +1,29 @@
+Truncate table :SCHEMA.GeneOrthologGroup;
+Truncate table :SCHEMA.TranscriptOrthologGroup;
+
+insert into :SCHEMA.GeneOrthologGroup (gene_id, group_id, project_id, org_abbrev, modification_date)
+SELECT ga.gene_source_id AS gene_id
+ , ogas.group_id
+ , ga.project_id
+ , ga.org_abbrev
+ , timestamp as modification_date
+ FROM :SCHEMA.geneattributes ga,
+ apidb.orthologgroupaasequence ogas
+ WHERE ga.aa_sequence_id = ogas.aa_sequence_id;
+
+
+
+insert into :SCHEMA.TranscriptOrthologGroup (source_id, gene_id, group_id, project_id, org_abbrev, modification_date)
+SELECT ta.source_id AS source_id
+ , ta.gene_source_id as gene_id
+ , ogas.group_id
+ , ta.project_id
+ , ta.org_abbrev
+ , timestamp as modification_date
+ FROM :SCHEMA.transcriptattributes ta
+ , apidb.orthologgroupaasequence ogas
+ WHERE ta.aa_sequence_id = ogas.aa_sequence_id;
+
+
+
+
diff --git a/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql b/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql
new file mode 100644
index 000000000..28ffcf3b4
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql
@@ -0,0 +1,58 @@
+/* ATTENTION: This script is run using a custom workflow step class */
+/* This accommodates the required to retain an empty table on undo */
+
+TRUNCATE TABLE :SCHEMA.PathwaysGeneTable;
+
+DO $$
+ DECLARE org record:PLPGSQL_DELIM
+ BEGIN
+ FOR org IN (SELECT DISTINCT abbrev FROM apidb.organism)
+ LOOP
+ INSERT INTO :SCHEMA.PathwaysGeneTable (
+ SELECT t2.*, current_timestamp AS modification_date FROM (
+ SELECT DISTINCT
+ gene_source_id
+ , pathway_source_id
+ , pathway_name
+ , count(reaction_source_id) AS reactions
+ , enzyme
+ , expasy_url
+ , pathway_source
+ , exact_match
+ , project_id
+ , org_abbrev
+ FROM (
+ SELECT DISTINCT
+ tp.gene_source_id
+ , tp.project_id
+ , tp.pathway_source_id
+ , tp.pathway_name
+ , tp.org_abbrev
+ , pr.reaction_source_id
+ , pr.enzyme
+ , pr.expasy_url
+ , tp.pathway_source
+ , CASE MAX(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match
+ FROM :SCHEMA.TranscriptPathway tp
+ , apidbtuning.PathwayAttributes pa
+ , apidbtuning.PathwayCompounds pc
+ , apidbtuning.PathwayReactions pr
+ WHERE tp.pathway_id = pa.pathway_id
+ AND pc.pathway_id = pa.pathway_id
+ AND pr.reaction_id = pc.reaction_id
+ AND pr.ext_db_name = pc.ext_db_name
+ AND tp.ec_number_pathway = pr.enzyme
+ AND tp.wildcard_count_gene <= tp.wildcard_count_pathway
+ AND pr.enzyme != '-.-.-.-'
+ AND tp.org_abbrev = org.abbrev
+ GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrev, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
+ ) t
+ GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match
+ ) t2
+ ORDER BY pathway_source, lower(pathway_name)
+ ):PLPGSQL_DELIM
+ COMMIT:PLPGSQL_DELIM
+ END LOOP:PLPGSQL_DELIM
+ END:PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL;
+
diff --git a/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql b/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql
new file mode 100644
index 000000000..afd718fb4
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql
@@ -0,0 +1,174 @@
+/* ATTENTION: This script is run using a custom workflow step class */
+/* This accommodates the requirement to retain empty TranscriptEC and TranscriptPathway tables on undo */
+
+/* STEP 1: Make sure temp tables have been dropped before starting */
+
+DROP TABLE IF EXISTS :SCHEMA.TranscriptEcUniverse;
+DROP TABLE IF EXISTS :SCHEMA.PathwayEcUniverse;
+DROP TABLE IF EXISTS :SCHEMA.TranscriptPathwayEcMatch;
+
+-- Just to be safe add these here
+TRUNCATE TABLE :SCHEMA.TranscriptPathway;
+TRUNCATE TABLE :SCHEMA.TranscriptEC;
+
+
+/* STEP 2: Load into the TranscriptEC table. This must be done here so that we capture ortho-derived EC numbers*/
+
+/* ATTENTION: this step loads into an empty partitioned table created in the orgSpecific graph */
+
+DO $$
+ DECLARE org RECORD:PLPGSQL_DELIM
+ BEGIN
+ FOR org IN (SELECT DISTINCT taxon_id, abbrev from apidb.organism)
+ LOOP
+ INSERT INTO :SCHEMA.TranscriptEc (
+ SELECT DISTINCT
+ ta.source_id
+ , ta.gene_source_id
+ , ec.enzyme_class_id
+ , ec.ec_number
+ , ec.ec_number_1
+ , ec.ec_number_2
+ , ec.ec_number_3
+ , ec.ec_number_4
+ , regexp_count(ec.ec_number, '-') as wildcard_count
+ , asec.evidence_code
+ , ta.project_id
+ , org.abbrev as org_abbrev
+ , current_timestamp as modification_date
+ FROM sres.EnzymeClass ec
+ , dots.AaSequenceEnzymeClass asec
+ , :SCHEMA.transcriptattributes ta
+ WHERE asec.aa_sequence_id = ta.aa_sequence_id
+ AND asec.enzyme_class_id = ec.enzyme_class_id
+ AND ta.org_abbrev = org.abbrev
+ ):PLPGSQL_DELIM
+ COMMIT:PLPGSQL_DELIM
+ END LOOP:PLPGSQL_DELIM
+ END:PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL;
+
+
+/* STEP 3: Extracts the distinct EC numbers from TranscriptEC */
+/* This represents the "universe" of EC numbers associated to transcripts */
+/* Temp table, will be dropped */
+
+CREATE TABLE :SCHEMA.TranscriptEcUniverse as (
+ SELECT DISTINCT
+ enzyme_class_id
+ , ec_number
+ , ec_number_1
+ , ec_number_2
+ , ec_number_3
+ , ec_number_4
+ , wildcard_count
+ FROM :SCHEMA.TranscriptEc
+);
+
+/* STEP 4: Extract the distinct EC number from PathwayEC */
+/* This represents the "universe" of EC numbers associated to pathways */
+/* Temp table, will be dropped */
+
+CREATE TABLE :SCHEMA.PathwayEcUniverse as (
+ SELECT DISTINCT
+ enzyme_class_id
+ , ec_number
+ , ec_number_1
+ , ec_number_2
+ , ec_number_3
+ , ec_number_4
+ , wildcard_count
+ FROM :SCHEMA.PathwayEc
+);
+
+
+/* STEP 5: Match EC numbers from the transcript universe and EC numbers from the pathway universe */
+/* Use the universe tables to avoid redundancy */
+/* Temp table, will be dropped */
+
+CREATE TABLE :SCHEMA.TranscriptPathwayEcMatch as (
+ SELECT DISTINCT
+ teu.enzyme_class_id AS transcript_enzyme_class_id
+ , peu.enzyme_class_id AS pathway_enzyme_class_id
+ , teu.wildcard_count AS wildcard_count_transcript
+ , peu.wildcard_count AS wildcard_count_pathway
+ , teu.ec_number AS ec_number_transcript
+ , peu.ec_number AS ec_number_pathway
+ FROM :SCHEMA.TranscriptEcUniverse teu
+ , :SCHEMA.PathwayEc peu
+
+ -- this part does ec number expansion using the individual digits to avoid slow like syntax
+ WHERE (teu.ec_number_1 = peu.ec_number_1 or teu.ec_number_1 is null or peu.ec_number_1 is null)
+ AND (teu.ec_number_2 = peu.ec_number_2 or teu.ec_number_2 is null or peu.ec_number_2 is null)
+ AND (teu.ec_number_3 = peu.ec_number_3 or teu.ec_number_3 is null or peu.ec_number_3 is null)
+ AND (teu.ec_number_4 = peu.ec_number_4 or teu.ec_number_4 is null or peu.ec_number_4 is null)
+);
+
+
+/* STEP 6: Map the matched EC numbers to map back to both pathways and transcripts */
+/* ATTENTION: this step loads into an empty partitioned table created in the orgSpecific graph */
+
+/* This is the equivalent of the old TranscriptPathway tuning table */
+
+DO $$
+ DECLARE org RECORD:PLPGSQL_DELIM
+ BEGIN
+ FOR org IN (SELECT DISTINCT taxon_id, abbrev FROM apidb.organism)
+ LOOP
+ INSERT INTO :SCHEMA.transcriptpathway (
+ SELECT DISTINCT
+
+ -- gene info
+ ta.source_id
+ , ta.gene_source_id
+
+ -- pathway info
+ , pa.source_id AS pathway_source_id
+ , pa.name AS pathway_name
+ , pa.pathway_id
+ , pa.pathway_source
+ , pec.external_database_release_id
+
+ -- info about match
+ , tpem.ec_number_transcript AS ec_number_gene
+ , tpem.wildcard_count_transcript AS wildcard_count_gene
+ , tpem.ec_number_pathway
+ , tpem.wildcard_count_pathway
+ , CASE WHEN tpem.ec_number_pathway = tpem.ec_number_transcript
+ THEN 1
+ ELSE 0 END AS exact_match
+ , CASE WHEN tpem.wildcard_count_pathway + tpem.wildcard_count_transcript = 0
+ THEN 1
+ ELSE 0 END AS complete_ec
+
+ -- for partitioning
+ , ta.project_id
+ , org.abbrev AS org_abbrev
+ , current_timestamp AS modification_date
+ FROM :SCHEMA.TranscriptPathwayEcMatch tpem
+ , :SCHEMA.PathwayEc pec
+ , :SCHEMA.PathwayAttributes pa
+ , :SCHEMA.TranscriptAttributes ta
+ , :SCHEMA.TranscriptEc tec
+ WHERE tpem.ec_number_transcript = tec.ec_number
+ AND tpem.ec_number_pathway = pec.ec_number
+ AND pa.pathway_id = pec.pathway_id
+ AND ta.source_id = tec.source_id
+ AND tec.org_abbrev = org.abbrev
+ AND ta.org_abbrev = org.abbrev
+ -- JB: It is unclear what the intention here was. This will not remove any rows and shouldn't
+ -- AND (
+ -- (ta.orthomcl_name IS NULL AND tec.evidence_code != 'OrthoMCLDerived')
+ -- OR ta.orthomcl_name IS NOT NULL
+ -- )
+ ):PLPGSQL_DELIM
+ COMMIT:PLPGSQL_DELIM
+ END LOOP:PLPGSQL_DELIM
+ END:PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL;
+
+
+/* STEP 7: Delete temp tables */
+DROP TABLE :SCHEMA.TranscriptEcUniverse;
+DROP TABLE :SCHEMA.PathwayEcUniverse;
+DROP TABLE :SCHEMA.TranscriptPathwayEcMatch;
diff --git a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
new file mode 100644
index 000000000..983e5239c
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
@@ -0,0 +1,48 @@
+ drop table if exists :SCHEMA.SyntenicPairs;
+
+ create UNLOGGED table :SCHEMA.SyntenicPairs as
+ select distinct ga.na_feature_id, sg.syn_na_feature_id
+ from apidb.SyntenicGene sg, :SCHEMA.GeneAttributes ga
+ where sg.na_sequence_id = ga.na_sequence_id
+ and sg.end_max >= ga.start_min
+ and sg.start_min <= ga.end_max
+
+ ;
+
+ create index SynPair_idx
+ on :SCHEMA.SyntenicPairs (na_feature_id, syn_na_feature_id)
+ ;
+
+ create table :SCHEMA.OrthologousTranscripts as
+ with all_pairs
+ as (select ga.source_id
+ , ga.project_id
+ , ga.na_feature_id
+ , ota.source_id as ortho_source_id
+ , ota.gene_source_id as ortho_gene_source_id
+ , ota.project_id as ortho_project_id
+ , ota.gene_na_feature_id as ortho_na_feature_id
+ , ota.transcript_product as ortho_product
+ , ota.protein_length
+ , ga.name as ortho_name
+ , ota.organism as ortho_organism
+ , ota.taxon_id as ortho_taxon_id
+ , o.is_reference_strain
+ from :SCHEMA.Geneattributes ga
+ , :SCHEMA.TranscriptAttributes ota
+ , apidb.Organism o
+ where ga.ORTHOMCL_NAME = ota.ORTHOMCL_NAME
+ and ota.taxon_id = o.taxon_id
+ ),
+ syn_pairs
+ as (select na_feature_id, syn_na_feature_id, 1 as is_syntenic from :SCHEMA.SyntenicPairs
+ )
+ select all_pairs.*
+ , coalesce(syn_pairs.is_syntenic, 0) as is_syntenic
+ from all_pairs
+ left join syn_pairs
+ on all_pairs.na_feature_id = syn_pairs.na_feature_id
+ and all_pairs.ortho_na_feature_id = syn_pairs.syn_na_feature_id
+ ;
+
+ drop table if exists :SCHEMA.SyntenicPairs;
diff --git a/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql
new file mode 100644
index 000000000..677049f72
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql
@@ -0,0 +1,12 @@
+
+ create index ot_idx
+ on :SCHEMA.OrthologousTranscripts (source_id, project_id, is_syntenic desc, ortho_source_id,
+ ortho_project_id, ortho_gene_source_id, ortho_product,
+ ortho_name, ortho_organism, ortho_taxon_id, is_reference_strain)
+ ;
+
+ create index ot_smol_idx
+ on :SCHEMA.OrthologousTranscripts (is_syntenic, ortho_taxon_id, source_id, ortho_source_id,
+ ortho_project_id, ortho_gene_source_id)
+ ;
+
diff --git a/Model/lib/psql/webready/comparative/PhyleticPattern.psql b/Model/lib/psql/webready/comparative/PhyleticPattern.psql
new file mode 100644
index 000000000..1fad7add7
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/PhyleticPattern.psql
@@ -0,0 +1,395 @@
+
+
+CREATE TABLE :SCHEMA.PhyleticPattern AS
+ (SELECT actual.group_name,
+ actual.alveolata as alveolata_actual,
+ total.alveolata as alveolata_total,
+ round(100*actual.alveolata/total.alveolata,0) AS alveolata_percent,
+ actual.archaea as archaea_actual,
+ total.archaea as archaea_total,
+ round(100*actual.archaea/total.archaea,0) AS archaea_percent,
+ actual.amoeba as amoeba_actual,
+ total.amoeba as amoeba_total,
+ round(100*actual.amoeba/total.amoeba,0) AS amoeba_percent,
+ actual.bacteria as bacteria_actual,
+ total.bacteria as bacteria_total,
+ round(100*actual.bacteria/total.bacteria,0) AS bacteria_percent,
+ actual.fungi as fungi_actual,
+ total.fungi as fungi_total,
+ round(100*actual.fungi/total.fungi,0) AS fungi_percent,
+ actual.euglenozoa as euglenozoa_actual,
+ total.euglenozoa as euglenozoa_total,
+ round(100*actual.euglenozoa/total.euglenozoa,0) AS euglenozoa_percent,
+ actual.metazoa as metazoa_actual,
+ total.metazoa as metazoa_total,
+ round(100*actual.metazoa/total.metazoa,0) AS metazoa_percent,
+ actual.viridiplantae as viridiplantae_actual,
+ total.viridiplantae as viridiplantae_total,
+ round(100*actual.viridiplantae/total.viridiplantae,0) AS viridiplantae_percent,
+ actual.other_eukaryotes as other_eukaryotes_actual,
+ total.other_eukaryotes as other_eukaryotes_total,
+ round(100*actual.other_eukaryotes/total.other_eukaryotes,0) AS other_eukaryotes_percent,
+ actual.bacteria_firm as bacteria_firm_actual,
+ total.bacteria_firm as bacteria_firm_total,
+ actual.bacteria_proa as bacteria_proa_actual,
+ total.bacteria_proa as bacteria_proa_total,
+ actual.bacteria_prob as bacteria_prob_actual,
+ total.bacteria_prob as bacteria_prob_total,
+ actual.bacteria_prod as bacteria_prod_actual,
+ total.bacteria_prod as bacteria_prod_total,
+ actual.bacteria_proe as bacteria_proe_actual,
+ total.bacteria_proe as bacteria_proe_total,
+ actual.bacteria_prog as bacteria_prog_actual,
+ total.bacteria_prog as bacteria_prog_total,
+ actual.bacteria_obac as bacteria_obac_actual,
+ total.bacteria_obac as bacteria_obac_total,
+ actual.archaea_arch as archaea_arch_actual,
+ total.archaea_arch as archaea_arch_total,
+ actual.archaea_eury as archaea_eury_actual,
+ total.archaea_eury as archaea_eury_total,
+ actual.archaea_cren as archaea_cren_actual,
+ total.archaea_cren as archaea_cren_total,
+ actual.archaea_nano as archaea_nano_actual,
+ total.archaea_nano as archaea_nano_total,
+ actual.archaea_kora as archaea_kora_actual,
+ total.archaea_kora as archaea_kora_total,
+ --oeuk_genera.html as oeuk_genera_html,
+ --eugl_genera.html as eugl_genera_html,
+ --amoe_genera.html as amoe_genera_html,
+ actual.alveolata_alve as alveolata_alve_actual,
+ total.alveolata_alve as alveolata_alve_total,
+ actual.alveolata_cili as alveolata_cili_actual,
+ total.alveolata_cili as alveolata_cili_total,
+ actual.alveolata_apic as alveolata_apic_actual,
+ total.alveolata_apic as alveolata_apic_total,
+ actual.alveolata_cocc as alveolata_cocc_actual,
+ total.alveolata_cocc as alveolata_cocc_total,
+ actual.alveolata_haem as alveolata_haem_actual,
+ total.alveolata_haem as alveolata_haem_total,
+ actual.alveolata_piro as alveolata_piro_actual,
+ total.alveolata_piro as alveolata_piro_total,
+ actual.viridiplantae_stre as viridiplantae_stre_actual,
+ total.viridiplantae_stre as viridiplantae_stre_total,
+ actual.viridiplantae_chlo as viridiplantae_chlo_actual,
+ total.viridiplantae_chlo as viridiplantae_chlo_total,
+ actual.viridiplantae_rhod as viridiplantae_rhod_actual,
+ total.viridiplantae_rhod as viridiplantae_rhod_total,
+ actual.viridiplantae_cryp as viridiplantae_cryp_actual,
+ total.viridiplantae_cryp as viridiplantae_cryp_total,
+ actual.fungi_fung as fungi_fung_actual,
+ total.fungi_fung as fungi_fung_total,
+ actual.fungi_micr as fungi_micr_actual,
+ total.fungi_micr as fungi_micr_total,
+ actual.fungi_basi as fungi_basi_actual,
+ total.fungi_basi as fungi_basi_total,
+ actual.fungi_asco as fungi_asco_actual,
+ total.fungi_asco as fungi_asco_total,
+ actual.fungi_muco as fungi_muco_actual,
+ total.fungi_muco as fungi_muco_total,
+ actual.fungi_chyt as fungi_chyt_actual,
+ total.fungi_chyt as fungi_chyt_total,
+ actual.metazoa_omet as metazoa_omet_actual,
+ total.metazoa_omet as metazoa_omet_total,
+ actual.metazoa_nema as metazoa_nema_actual,
+ total.metazoa_nema as metazoa_nema_total,
+ actual.metazoa_arth as metazoa_arth_actual,
+ total.metazoa_arth as metazoa_arth_total,
+ actual.metazoa_chor as metazoa_chor_actual,
+ total.metazoa_chor as metazoa_chor_total,
+ actual.metazoa_acti as metazoa_acti_actual,
+ total.metazoa_acti as metazoa_acti_total,
+ actual.metazoa_aves as metazoa_aves_actual,
+ total.metazoa_aves as metazoa_aves_total,
+ actual.metazoa_mamm as metazoa_mamm_actual,
+ total.metazoa_mamm as metazoa_mamm_total,
+ actual.metazoa_tuni as metazoa_tuni_actual,
+ total.metazoa_tuni as metazoa_tuni_total
+ FROM
+ (SELECT SUM(CASE clade WHEN 'alveolata' THEN num ELSE 0 END) as alveolata,
+ SUM(CASE clade WHEN 'archaea' THEN num ELSE 0 END) as archaea,
+ SUM(CASE clade WHEN 'amoeba' THEN num ELSE 0 END) as amoeba,
+ SUM(CASE clade WHEN 'bacteria' THEN num ELSE 0 END) as bacteria,
+ SUM(CASE clade WHEN 'fungi' THEN num ELSE 0 END) as fungi,
+ SUM(CASE clade WHEN 'euglenozoa' THEN num ELSE 0 END) as euglenozoa,
+ SUM(CASE clade WHEN 'metazoa' THEN num ELSE 0 END) as metazoa,
+ SUM(CASE clade WHEN 'viridiplantae' THEN num ELSE 0 END) as viridiplantae,
+ SUM(CASE clade WHEN 'other_eukaryotes' THEN num ELSE 0 END) as other_eukaryotes,
+ SUM(CASE clade WHEN 'FIRM' THEN num ELSE 0 END) as bacteria_firm,
+ SUM(CASE clade WHEN 'PROA' THEN num ELSE 0 END) as bacteria_proa,
+ SUM(CASE clade WHEN 'PROB' THEN num ELSE 0 END) as bacteria_prob,
+ SUM(CASE clade WHEN 'PROD' THEN num ELSE 0 END) as bacteria_prod,
+ SUM(CASE clade WHEN 'PROE' THEN num ELSE 0 END) as bacteria_proe,
+ SUM(CASE clade WHEN 'PROG' THEN num ELSE 0 END) as bacteria_prog,
+ SUM(CASE clade WHEN 'OBAC' THEN num ELSE 0 END) as bacteria_obac,
+ SUM(CASE clade WHEN 'ARCH' THEN num ELSE 0 END) as archaea_arch,
+ SUM(CASE clade WHEN 'EURY' THEN num ELSE 0 END) as archaea_eury,
+ SUM(CASE clade WHEN 'CREN' THEN num ELSE 0 END) as archaea_cren,
+ SUM(CASE clade WHEN 'NANO' THEN num ELSE 0 END) as archaea_nano,
+ SUM(CASE clade WHEN 'KORA' THEN num ELSE 0 END) as archaea_kora,
+ SUM(CASE clade WHEN 'ALVE' THEN num ELSE 0 END) as alveolata_alve,
+ SUM(CASE clade WHEN 'CILI' THEN num ELSE 0 END) as alveolata_cili,
+ SUM(CASE clade WHEN 'APIC' THEN num ELSE 0 END) as alveolata_apic,
+ SUM(CASE clade WHEN 'COCC' THEN num ELSE 0 END) as alveolata_cocc,
+ SUM(CASE clade WHEN 'HAEM' THEN num ELSE 0 END) as alveolata_haem,
+ SUM(CASE clade WHEN 'PIRO' THEN num ELSE 0 END) as alveolata_piro,
+ SUM(CASE clade WHEN 'STRE' THEN num ELSE 0 END) as viridiplantae_stre,
+ SUM(CASE clade WHEN 'CHLO' THEN num ELSE 0 END) as viridiplantae_chlo,
+ SUM(CASE clade WHEN 'RHOD' THEN num ELSE 0 END) as viridiplantae_rhod,
+ SUM(CASE clade WHEN 'CRYP' THEN num ELSE 0 END) as viridiplantae_cryp,
+ SUM(CASE clade WHEN 'FUNG' THEN num ELSE 0 END) as fungi_fung,
+ SUM(CASE clade WHEN 'MICR' THEN num ELSE 0 END) as fungi_micr,
+ SUM(CASE clade WHEN 'BASI' THEN num ELSE 0 END) as fungi_basi,
+ SUM(CASE clade WHEN 'ASCO' THEN num ELSE 0 END) as fungi_asco,
+ SUM(CASE clade WHEN 'MUCO' THEN num ELSE 0 END) as fungi_muco,
+ SUM(CASE clade WHEN 'CHYT' THEN num ELSE 0 END) as fungi_chyt,
+ SUM(CASE clade WHEN 'OMET' THEN num ELSE 0 END) as metazoa_omet,
+ SUM(CASE clade WHEN 'NEMA' THEN num ELSE 0 END) as metazoa_nema,
+ SUM(CASE clade WHEN 'ARTH' THEN num ELSE 0 END) as metazoa_arth,
+ SUM(CASE clade WHEN 'CHOR' THEN num ELSE 0 END) as metazoa_chor,
+ SUM(CASE clade WHEN 'ACTI' THEN num ELSE 0 END) as metazoa_acti,
+ SUM(CASE clade WHEN 'AVES' THEN num ELSE 0 END) as metazoa_aves,
+ SUM(CASE clade WHEN 'MAMM' THEN num ELSE 0 END) as metazoa_mamm,
+ SUM(CASE clade WHEN 'TUNI' THEN num ELSE 0 END) as metazoa_tuni
+ FROM (
+ (WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'BACT'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'bacteria' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'ARCH'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'archaea' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'ALVE'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'alveolata' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'AMOE'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'amoeba' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'EUGL'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'euglenozoa' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'VIRI'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'viridiplantae' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'FUNG'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'fungi' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'META'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'metazoa' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'OEUK'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'other_eukaryotes' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ SELECT b.three_letter_abbrev AS clade,a.num
+ FROM (SELECT parent_id, COUNT(orthomcl_clade_id) AS num
+ FROM apidb.OrthomclClade
+ WHERE core_peripheral in ('C','P')
+ GROUP BY parent_id) a,
+ apidb.OrthomclClade b
+ WHERE a.parent_id = b.orthomcl_clade_id
+ )
+ )) total,
+ (SELECT name as group_name,
+ SUM(CASE three_letter_abbrev WHEN 'ALVE' THEN number_of_taxa ELSE 0 END) as alveolata,
+ SUM(CASE three_letter_abbrev WHEN 'ARCH' THEN number_of_taxa ELSE 0 END) as archaea,
+ SUM(CASE three_letter_abbrev WHEN 'AMOE' THEN number_of_taxa ELSE 0 END) as amoeba,
+ SUM(CASE three_letter_abbrev WHEN 'BACT' THEN number_of_taxa ELSE 0 END) as bacteria,
+ SUM(CASE three_letter_abbrev WHEN 'FUNG' THEN number_of_taxa ELSE 0 END) as fungi,
+ SUM(CASE three_letter_abbrev WHEN 'EUGL' THEN number_of_taxa ELSE 0 END) as euglenozoa,
+ SUM(CASE three_letter_abbrev WHEN 'META' THEN number_of_taxa ELSE 0 END) as metazoa,
+ SUM(CASE three_letter_abbrev WHEN 'VIRI' THEN number_of_taxa ELSE 0 END) as viridiplantae,
+ SUM(CASE three_letter_abbrev WHEN 'OEUK' THEN number_of_taxa ELSE 0 END) as other_eukaryotes,
+ SUM(CASE three_letter_abbrev WHEN 'FIRM' THEN number_of_taxa ELSE 0 END) as bacteria_firm,
+ SUM(CASE three_letter_abbrev WHEN 'PROA' THEN number_of_taxa ELSE 0 END) as bacteria_proa,
+ SUM(CASE three_letter_abbrev WHEN 'PROB' THEN number_of_taxa ELSE 0 END) as bacteria_prob,
+ SUM(CASE three_letter_abbrev WHEN 'PROD' THEN number_of_taxa ELSE 0 END) as bacteria_prod,
+ SUM(CASE three_letter_abbrev WHEN 'PROE' THEN number_of_taxa ELSE 0 END) as bacteria_proe,
+ SUM(CASE three_letter_abbrev WHEN 'PROG' THEN number_of_taxa ELSE 0 END) as bacteria_prog,
+ SUM(CASE three_letter_abbrev WHEN 'OBAC' THEN number_of_taxa ELSE 0 END) as bacteria_obac,
+ SUM(CASE three_letter_abbrev WHEN 'ARCH' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'EURY' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'CREN' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'NANO' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'KORA' THEN number_of_taxa ELSE 0 END) as archaea_arch,
+ SUM(CASE three_letter_abbrev WHEN 'EURY' THEN number_of_taxa ELSE 0 END) as archaea_eury,
+ SUM(CASE three_letter_abbrev WHEN 'CREN' THEN number_of_taxa ELSE 0 END) as archaea_cren,
+ SUM(CASE three_letter_abbrev WHEN 'NANO' THEN number_of_taxa ELSE 0 END) as archaea_nano,
+ SUM(CASE three_letter_abbrev WHEN 'KORA' THEN number_of_taxa ELSE 0 END) as archaea_kora,
+ SUM(CASE three_letter_abbrev WHEN 'ALVE' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'CILI' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'APIC' THEN number_of_taxa ELSE 0 END) as alveolata_alve,
+ SUM(CASE three_letter_abbrev WHEN 'CILI' THEN number_of_taxa ELSE 0 END) as alveolata_cili,
+ SUM(CASE three_letter_abbrev WHEN 'APIC' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'COCC' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'ACON' THEN number_of_taxa ELSE 0 END) as alveolata_apic,
+ SUM(CASE three_letter_abbrev WHEN 'COCC' THEN number_of_taxa ELSE 0 END) as alveolata_cocc,
+ SUM(CASE three_letter_abbrev WHEN 'HAEM' THEN number_of_taxa ELSE 0 END) as alveolata_haem,
+ SUM(CASE three_letter_abbrev WHEN 'PIRO' THEN number_of_taxa ELSE 0 END) as alveolata_piro,
+ SUM(CASE three_letter_abbrev WHEN 'STRE' THEN number_of_taxa ELSE 0 END) as viridiplantae_stre,
+ SUM(CASE three_letter_abbrev WHEN 'CHLO' THEN number_of_taxa ELSE 0 END) as viridiplantae_chlo,
+ SUM(CASE three_letter_abbrev WHEN 'RHOD' THEN number_of_taxa ELSE 0 END) as viridiplantae_rhod,
+ SUM(CASE three_letter_abbrev WHEN 'CRYP' THEN number_of_taxa ELSE 0 END) as viridiplantae_cryp,
+ SUM(CASE three_letter_abbrev WHEN 'FUNG' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'MICR' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'BASI' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'ASCO' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'MUCO' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'CHYT' THEN number_of_taxa ELSE 0 END) as fungi_fung,
+ SUM(CASE three_letter_abbrev WHEN 'MICR' THEN number_of_taxa ELSE 0 END) as fungi_micr,
+ SUM(CASE three_letter_abbrev WHEN 'BASI' THEN number_of_taxa ELSE 0 END) as fungi_basi,
+ SUM(CASE three_letter_abbrev WHEN 'ASCO' THEN number_of_taxa ELSE 0 END) as fungi_asco,
+ SUM(CASE three_letter_abbrev WHEN 'MUCO' THEN number_of_taxa ELSE 0 END) as fungi_muco,
+ SUM(CASE three_letter_abbrev WHEN 'CHYT' THEN number_of_taxa ELSE 0 END) as fungi_chyt,
+ SUM(CASE three_letter_abbrev WHEN 'OMET' THEN number_of_taxa ELSE 0 END) as metazoa_omet,
+ SUM(CASE three_letter_abbrev WHEN 'NEMA' THEN number_of_taxa ELSE 0 END) as metazoa_nema,
+ SUM(CASE three_letter_abbrev WHEN 'ARTH' THEN number_of_taxa ELSE 0 END) as metazoa_arth,
+ SUM(CASE three_letter_abbrev WHEN 'CHOR' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'ACTI' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'AVES' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'MAMM' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'TUNI' THEN number_of_taxa ELSE 0 END) as metazoa_chor,
+ SUM(CASE three_letter_abbrev WHEN 'ACTI' THEN number_of_taxa ELSE 0 END) as metazoa_acti,
+ SUM(CASE three_letter_abbrev WHEN 'AVES' THEN number_of_taxa ELSE 0 END) as metazoa_aves,
+ SUM(CASE three_letter_abbrev WHEN 'MAMM' THEN number_of_taxa ELSE 0 END) as metazoa_mamm,
+ SUM(CASE three_letter_abbrev WHEN 'TUNI' THEN number_of_taxa ELSE 0 END) as metazoa_tuni
+ FROM (SELECT og.group_id AS name,agt.three_letter_abbrev,agt.number_of_taxa::numeric
+ FROM ApiDB.OrthologGroupTaxon agt, apidb.orthologgroup og
+ WHERE agt.group_id = og.group_id
+ AND agt.three_letter_abbrev = UPPER(agt.three_letter_abbrev))
+ GROUP BY name) actual
+)
+
+ ;
+
diff --git a/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql
new file mode 100644
index 000000000..5ba8d0129
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql
@@ -0,0 +1,38 @@
+ create table :SCHEMA.ProteinDomainAssignment as
+ select sa.full_id, sa.group_name,
+ r.interpro_primary_id as accession,
+ r.interpro_desc as description,
+ CAST (NULL as NUMERIC) as domain_index,
+ sa.aa_sequence_id,
+ r.interpro_start_min as start_min,
+ r.interpro_end_min as end_max
+ from :SCHEMA.ProteinSequenceGroup sa, apidb.interproresults r
+ where sa.full_id = r.protein_source_id
+ and upper(r.interpro_db_name) = 'PFAM'
+;
+
+ create index domain_accession_ix
+ on :SCHEMA.ProteinDomainAssignment (accession, full_id, group_name)
+ ;
+
+ drop table if exists :SCHEMA.domainIndex_tmp
+;
+
+ create table :SCHEMA.domainIndex_tmp as
+ select row_number() OVER () as domain_index, accession
+ from (select distinct accession
+ from :SCHEMA.ProteinDomainAssignment
+ order by accession)
+;
+
+ create index domainIdxIdx on :SCHEMA.DomainIndex_tmp(accession, domain_index)
+;
+
+ update :SCHEMA.ProteinDomainAssignment da
+ set domain_index = (select domain_index
+ from :SCHEMA.DomainIndex_tmp
+ where accession = da.accession)
+;
+
+ drop table :SCHEMA.domainIndex_tmp
+ ;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql b/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql
new file mode 100644
index 000000000..7fe3cdc3c
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql
@@ -0,0 +1,11 @@
+ create index domain_ix_ix
+ on :SCHEMA.ProteinDomainAssignment (domain_index, accession, full_id)
+;
+
+ create index domain_group_ix
+ on :SCHEMA.ProteinDomainAssignment (group_name, accession, full_id)
+;
+
+ create index domain_seq_ix
+ on :SCHEMA.ProteinDomainAssignment (aa_sequence_id, accession, full_id, group_name)
+;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql
new file mode 100644
index 000000000..de2df5a82
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql
@@ -0,0 +1,92 @@
+ create table :SCHEMA.ProteinSequenceGroup as
+ SELECT
+ distinct(aas.source_id) AS full_id,
+ aas.source_id,
+ aas.aa_sequence_id,
+ length(aas.sequence) as length,
+ aas.description AS product,
+ aas.taxon_id,
+
+ taxon.orthomcl_taxon_id,
+ taxon.taxon_group,
+ taxon.orthomcl_abbrev AS taxon_abbreviation,
+ taxon.name AS organism_name,
+ taxon.core_peripheral,
+
+ o.group_id AS group_name,
+ o.ortholog_group_id,
+ o.number_of_members AS group_size,
+ o.number_of_core_members,
+ o.number_of_peripheral_members,
+ CASE is_residual WHEN 1 THEN 'Residual'
+ ELSE 'Core' END AS group_type,
+ urls.source_url,
+ urls.source_text
+ FROM
+ dots.AASequence aas,
+ apidb.orthologGroup o,
+ apidb.orthologGroupAASequence ogseq,
+ (
+ SELECT o.orthomcl_abbrev,
+ o.taxon_id as orthomcl_taxon_id,
+ t.name,
+ t.core_peripheral,
+ t.taxon_group
+ FROM apidb.organism o,
+ (WITH RECURSIVE TaxonHierarchy AS (
+ SELECT
+ three_letter_abbrev,
+ orthomcl_clade_id,
+ name,
+ core_peripheral,
+ name AS taxon_group,
+ parent_id
+ FROM apidb.OrthomclClade
+ WHERE name IN ('Archaea', 'Bacteria', 'Alveolates', 'Amoebozoa', 'Euglenozoa',
+ 'Fungi', 'Metazoa', 'Other Eukaryota', 'Viridiplantae')
+ UNION ALL
+ SELECT
+ child.three_letter_abbrev,
+ child.orthomcl_clade_id,
+ child.name,
+ child.core_peripheral,
+ parent.taxon_group,
+ child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT three_letter_abbrev, taxon_group, name, core_peripheral
+ FROM TaxonHierarchy
+ WHERE core_peripheral IN ('C', 'P')
+ ) t
+ WHERE t.three_letter_abbrev = o.orthomcl_abbrev
+ ) taxon,
+
+ (
+ SELECT aas.aa_sequence_id,
+ CASE
+ WHEN ores.resource_name IN ('AmoebaDB','CryptoDB','FungiDB','GiardiaDB','HostDB','MicrosporidiaDB',
+ 'PlasmoDB','PiroplasmaDB','ToxoDB','TrichDB','TriTrypDB','VectorBase')
+ THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/downloads')) || 'record/gene/'
+ || aas.source_id
+ WHEN ores.resource_name = 'Uniprot'
+ THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/proteomes') ) || 'uniprot/'
+ || aas.source_id
+ ELSE '' END AS source_url,
+ CASE WHEN ores.resource_name IS NULL THEN ''
+ ELSE aas.source_id || ' (' || ores.resource_name || ')' END AS source_text
+ FROM dots.AaSequence aas,
+ apidb.organism ot,
+ apidb.orthomclresource ores
+ WHERE ot.taxon_id = ores.orthomcl_taxon_id
+ AND ot.taxon_id = aas.taxon_id) urls
+ WHERE aas.aa_sequence_id = ogseq.aa_sequence_id
+ AND ogseq.group_id = o.group_id
+ AND aas.aa_sequence_id = urls.aa_sequence_id
+ AND taxon.orthomcl_taxon_id = aas.taxon_id
+ AND aas.taxon_id in (select distinct(eas.taxon_id) from apidb.organism og, dots.aasequence eas where eas.taxon_id = og.taxon_id)
+;
+
+ alter table :SCHEMA.ProteinSequenceGroup
+ add constraint SeqAttrs_pk primary key (full_id)
+;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.psql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.psql
new file mode 100644
index 000000000..4a136d10f
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.psql
@@ -0,0 +1,15 @@
+ create unique index PSG_idx ON :SCHEMA.ProteinSequenceGroup (full_id, group_name, taxon_id, source_id)
+;
+
+ create unique index PSG_gusIdx ON :SCHEMA.ProteinSequenceGroup (ortholog_group_id, aa_sequence_id)
+;
+
+ create unique index PSG_idx2 ON :SCHEMA.ProteinSequenceGroup (group_name, length desc, full_id, taxon_id)
+;
+
+ create unique index PSG_idx3
+ on :SCHEMA.ProteinSequenceGroup (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id)
+ ;
+
+ create unique index PSG_idx4 ON :SCHEMA.ProteinSequenceGroup (source_id, full_id, group_name, taxon_id)
+;
diff --git a/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql b/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql
new file mode 100644
index 000000000..9a50c36b6
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql
@@ -0,0 +1,4 @@
+/* This script truncates the tables loaded by LoadTranscriptPathway */
+/* The table is not dropped and the partitions are retained */
+Truncate table :SCHEMA.GeneOrthologGroup;
+Truncate table :SCHEMA.TranscriptOrthologGroup;
diff --git a/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql b/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql
new file mode 100644
index 000000000..6c838ced5
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql
@@ -0,0 +1,3 @@
+/* This script truncates the tables loaded by LoadPathwaysGeneTable */
+
+TRUNCATE TABLE :SCHEMA.PathwaysGeneTable;
diff --git a/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql b/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql
new file mode 100644
index 000000000..329c22d30
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql
@@ -0,0 +1,5 @@
+/* This script truncates the tables loaded by LoadTranscriptPathway */
+/* The table is not dropped and the partitions are retained */
+
+TRUNCATE TABLE :SCHEMA.TranscriptPathway;
+TRUNCATE TABLE :SCHEMA.TranscriptEC;
diff --git a/Model/lib/psql/webready/global/CompoundAttributes.psql b/Model/lib/psql/webready/global/CompoundAttributes.psql
new file mode 100644
index 000000000..aa622e1f3
--- /dev/null
+++ b/Model/lib/psql/webready/global/CompoundAttributes.psql
@@ -0,0 +1,19 @@
+ CREATE TABLE :SCHEMA.CompoundAttributes AS
+ SELECT p.ID
+ , p.source_id
+ , p.compound_name
+ , string_agg(childc.other_names, ';' ORDER BY childc.other_names) AS other_names
+ , string_agg(childc.iupac_name, ';' ORDER BY childc.iupac_name) AS iupac_name
+ , string_agg(childc.syn, ';' ORDER BY childc.syn) AS syn
+ , p.definition
+ , p.secondary_ids
+ , string_agg(childc.formula, ';' ORDER BY childc.formula) AS formula
+ , avg(childc.mass::numeric) AS mass
+ FROM :SCHEMA.CompoundProperties p
+ , (SELECT id, parent_id, other_names, iupac_name, syn, mass, formula FROM :SCHEMA.CompoundProperties ) childc
+ WHERE p.parent_id IS NULL
+ AND ( p.ID = childc.parent_id OR p.ID = childc.ID )
+ GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids
+ ;
+
+
diff --git a/Model/lib/psql/webready/global/CompoundAttributes_ix.psql b/Model/lib/psql/webready/global/CompoundAttributes_ix.psql
new file mode 100644
index 000000000..2cdd87d86
--- /dev/null
+++ b/Model/lib/psql/webready/global/CompoundAttributes_ix.psql
@@ -0,0 +1,2 @@
+ CREATE INDEX CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id)
+ ;
diff --git a/Model/lib/psql/webready/global/CompoundId.psql b/Model/lib/psql/webready/global/CompoundId.psql
new file mode 100644
index 000000000..9cd80c05a
--- /dev/null
+++ b/Model/lib/psql/webready/global/CompoundId.psql
@@ -0,0 +1,30 @@
+ CREATE TABLE :SCHEMA.CompoundId AS
+ SELECT source_id AS id, source_id AS compound, 'same ID' AS type, '' as source
+ FROM :SCHEMA.CompoundAttributes
+ UNION
+ SELECT p.source_id AS id, ca.source_id AS compound, 'child ID' AS type, '' as source
+ FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.CompoundProperties p
+ WHERE ca.id = p.parent_id
+ UNION
+ SELECT da.accession_number AS id, p.source_id AS compound, 'KEGG' AS type, '' as source
+ FROM chebi.database_accession da, :SCHEMA.CompoundAttributes p
+ WHERE da.type='KEGG COMPOUND accession'
+ AND da.compound_id = p.id
+ UNION
+ SELECT distinct da.accession_number AS id, p.chebi_accession AS compound, 'KEGG' as type, '' as source
+ FROM chebi.database_accession da, chebi.compounds c, chebi.compounds p
+ WHERE NOT p.status in ('D', 'F') AND da.type='KEGG COMPOUND accession'
+ AND da.compound_id = c.id AND c.parent_id=p.id
+ UNION
+ SELECT n.name as id, ca.source_id as compound, 'name' as type, n.source
+ FROM :SCHEMA.CompoundAttributes ca, chebi.names n
+ WHERE ca.id = n.compound_id
+ AND n.type = 'NAME'
+ UNION
+ SELECT n.name as id, ca.source_id as compound, 'synonym' as type, n.source
+ FROM :SCHEMA.CompoundAttributes ca, chebi.names n
+ WHERE ca.id = n.compound_id
+ AND n.type = 'SYNONYM'
+ ;
+
+
diff --git a/Model/lib/psql/webready/global/CompoundId_ix.psql b/Model/lib/psql/webready/global/CompoundId_ix.psql
new file mode 100644
index 000000000..20a155ef6
--- /dev/null
+++ b/Model/lib/psql/webready/global/CompoundId_ix.psql
@@ -0,0 +1,2 @@
+ CREATE INDEX CompoundId_idx ON :SCHEMA.CompoundId (id, compound)
+ ;
diff --git a/Model/lib/psql/webready/global/CompoundProperties.psql b/Model/lib/psql/webready/global/CompoundProperties.psql
new file mode 100644
index 000000000..5faddf835
--- /dev/null
+++ b/Model/lib/psql/webready/global/CompoundProperties.psql
@@ -0,0 +1,26 @@
+ CREATE TABLE :SCHEMA.CompoundProperties AS
+ SELECT c.ID, c.chebi_accession AS source_id, c.parent_id,
+ c.name AS compound_name,
+ substr(string_agg(cn.name, ';'), 1, 1000) AS other_names,
+ substr(string_agg(ciup.iupac_name, ';'), 1, 1000) AS iupac_name,
+ substr(string_agg(csyn.syn, ';'), 1, 1000) AS syn,
+ c.definition, m.mass,
+ string_agg(formu.formula, ';' order by formu.formula) AS formula,
+ string_agg(sec.chebi_accession, ';' order by sec.chebi_accession) AS secondary_ids
+ FROM chebi.compounds c
+ LEFT JOIN ( SELECT compound_id, NAME FROM chebi.names WHERE TYPE='NAME')
+ cn ON c.ID = cn.compound_id
+ LEFT JOIN ( SELECT compound_id, MIN(NAME) AS iupac_name FROM chebi.names WHERE TYPE='IUPAC NAME' GROUP BY compound_id)
+ ciup ON c.ID = ciup.compound_id
+ LEFT JOIN ( SELECT compound_id, MIN(NAME) AS syn FROM chebi.names WHERE type='SYNONYM' GROUP BY compound_id)
+ csyn ON c.ID = csyn.compound_id
+ LEFT JOIN ( SELECT compound_id, chemical_data AS formula FROM chebi.chemical_data WHERE TYPE='FORMULA')
+ formu ON c.ID = formu.compound_id
+ LEFT JOIN ( SELECT compound_id, chemical_data AS mass FROM chebi.chemical_data WHERE TYPE='MASS' and chemical_data != 'NaN')
+ m ON c.ID = m.compound_id
+ LEFT JOIN ( SELECT parent_id, chebi_accession FROM chebi.compounds)
+ sec ON c.ID = sec.parent_id
+ WHERE NOT c.status in ('D', 'F')
+ GROUP BY c.ID, c.chebi_accession, c.parent_id, c.name, c.definition, m.mass
+ ;
+
diff --git a/Model/lib/psql/webready/global/CompoundProperties_ix.psql b/Model/lib/psql/webready/global/CompoundProperties_ix.psql
new file mode 100644
index 000000000..5943ec226
--- /dev/null
+++ b/Model/lib/psql/webready/global/CompoundProperties_ix.psql
@@ -0,0 +1,3 @@
+CREATE INDEX CompoundProperties_idx1 ON :SCHEMA.CompoundProperties (id);
+
+ CREATE INDEX CompoundProperties_idx2 ON :SCHEMA.CompoundProperties (parent_id);
\ No newline at end of file
diff --git a/Model/lib/psql/webready/global/CompoundTypeAheads.psql b/Model/lib/psql/webready/global/CompoundTypeAheads.psql
new file mode 100644
index 000000000..dfefb18f9
--- /dev/null
+++ b/Model/lib/psql/webready/global/CompoundTypeAheads.psql
@@ -0,0 +1,12 @@
+ CREATE TABLE :SCHEMA.CompoundTypeAheads AS
+ SELECT ca.source_id AS compound_id,
+ ca.source_id || ' (' || ca.compound_name || ')' AS display
+ FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.PathwayCompounds pc
+ WHERE pc.chebi_accession = ca.source_id
+ UNION
+ SELECT ca.source_id AS compound_id,
+ pc.compound_source_id || ' (' || ca.compound_name || ')' AS display
+ FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.PathwayCompounds pc
+ WHERE pc.chebi_accession = ca.source_id
+ ;
+
diff --git a/Model/lib/psql/webready/global/OntologyLevels.psql b/Model/lib/psql/webready/global/OntologyLevels.psql
new file mode 100644
index 000000000..2a80ec0fb
--- /dev/null
+++ b/Model/lib/psql/webready/global/OntologyLevels.psql
@@ -0,0 +1,38 @@
+ DROP TABLE IF EXISTS :SCHEMA.Is_a_links;
+
+ CREATE UNLOGGED TABLE :SCHEMA.Is_a_links AS
+ SELECT subject_term_id, object_term_id
+ FROM sres.OntologyRelationship rel, sres.OntologyTerm pred
+ WHERE rel.predicate_term_id = pred.ontology_term_id
+ AND pred.name = 'is_a'
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.Roots;
+
+ CREATE UNLOGGED TABLE :SCHEMA.Roots AS
+ SELECT object_term_id FROM :SCHEMA.is_a_links
+ EXCEPT
+ SELECT subject_term_id FROM :SCHEMA.is_a_links
+ ;
+
+ CREATE TABLE :SCHEMA.OntologyLevels as
+ WITH RECURSIVE levels(ontology_term_id, depth) AS (
+ SELECT object_term_id, 1 as depth FROM :SCHEMA.Roots
+ UNION
+ SELECT :SCHEMA.is_a_links.subject_term_id, levels.depth + 1 as depth
+ FROM :SCHEMA.Is_a_links, levels
+ WHERE :SCHEMA.is_a_links.object_term_id = levels.ontology_term_id
+ )
+ SELECT ontology_term_id, min(depth) as min_depth, max(depth) as max_depth
+ FROM (
+ SELECT ontology_term_id, depth
+ FROM levels
+ WHERE ontology_term_id NOT IN (SELECT object_term_id FROM :SCHEMA.Roots)
+ UNION
+ SELECT object_term_id, 0 FROM :SCHEMA.Roots
+ ) t
+ GROUP BY ontology_term_id
+ ;
+
+drop table :SCHEMA.Is_a_links;
+drop table :SCHEMA.Roots;
diff --git a/Model/lib/psql/webready/global/OntologyLevels_ix.psql b/Model/lib/psql/webready/global/OntologyLevels_ix.psql
new file mode 100644
index 000000000..5f95d3889
--- /dev/null
+++ b/Model/lib/psql/webready/global/OntologyLevels_ix.psql
@@ -0,0 +1,2 @@
+ create index olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth)
+ ;
diff --git a/Model/lib/psql/webready/global/PathwayAttributes.psql b/Model/lib/psql/webready/global/PathwayAttributes.psql
new file mode 100644
index 000000000..5c8567fff
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayAttributes.psql
@@ -0,0 +1,52 @@
+ drop table if exists :SCHEMA.PathwayAttributes;
+
+ CREATE TABLE :SCHEMA.PathwayAttributes as
+ SELECT
+ p.source_id
+ , p.pathway_id
+ , p.name
+ , enz.total_enzyme_count
+ , cpd.total_compound_count
+ , p.url
+ , replace(replace(ed.name, 'Pathways_', ''), '_RSRC', '') as pathway_source
+ , ed.name as external_db_name
+ , edr.version as external_db_version
+ FROM
+ sres.pathway p
+ , sres.externalDatabase ed
+ , sres.externalDatabaseRelease edr
+ ,(SELECT
+ COUNT( *) AS total_compound_count
+ , pathway_id
+ FROM
+ sres.pathwayNode pn
+ , SRES.ontologyterm ot
+ WHERE
+ pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'molecular entity'
+ GROUP BY
+ pathway_id
+ ) cpd
+ ,(SELECT
+ COUNT( *) AS total_enzyme_count
+ , pathway_id
+ FROM
+ sres.pathwayNode pn
+ , SRES.ontologyterm ot
+ WHERE
+ pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'enzyme'
+ GROUP BY
+ pathway_id
+ ) enz
+ WHERE
+ ed.external_database_id = edr.external_database_id
+ AND edr.external_database_release_id = p.external_database_release_id
+ AND cpd.pathway_id = p.pathway_id
+ AND enz.pathway_id = p.pathway_id
+ AND source_id NOT IN('ec01100', 'ec01110', 'ec01120')
+ -- temporarily remove MPMP from release 46
+ AND ed.name NOT LIKE '%MPMP%'
+ ;
+
+
diff --git a/Model/lib/psql/webready/global/PathwayAttributes_ix.psql b/Model/lib/psql/webready/global/PathwayAttributes_ix.psql
new file mode 100644
index 000000000..492ee6882
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayAttributes_ix.psql
@@ -0,0 +1,7 @@
+ CREATE UNIQUE index PathAttr_sourceId_pwaySrc
+ ON :SCHEMA.PathwayAttributes (source_id, pathway_source)
+ ;
+
+ create index PathAttr_ix
+ on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count)
+ ;
diff --git a/Model/lib/psql/webready/global/PathwayCompounds.psql b/Model/lib/psql/webready/global/PathwayCompounds.psql
new file mode 100644
index 000000000..68fcc8290
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayCompounds.psql
@@ -0,0 +1,81 @@
+ CREATE TABLE :SCHEMA.PathwayCompounds AS
+ SELECT
+ pathway_id
+ , reaction_id
+ , ext_db_name
+ , ext_db_version
+ , compound_node_id
+ , compound_source_id
+ , c.chebi_accession
+ , case when c.chebi_accession is not null then 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=' || c.chebi_accession else null end as chebi_url
+ , type
+ FROM (
+ SELECT
+ p.PATHWAY_ID
+ , prx.PATHWAY_REACTION_ID as reaction_id
+ , ed.NAME as ext_db_name
+ , edr.version as ext_db_version
+ , pn.pathway_node_id as compound_node_id
+ , pn.DISPLAY_LABEL as compound_source_id
+ , 'substrate' as type
+ , pn.row_id
+ FROM
+ APIDB.PATHWAYREACTION prx
+ , SRES.PATHWAY p
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.ONTOLOGYTERM ot
+ , SRES.EXTERNALDATABASE ed
+ , SRES.EXTERNALDATABASERELEASE edr
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND prx.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.NAME = 'molecular entity'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND p.EXTERNAL_DATABASE_RELEASE_ID = edr.EXTERNAL_DATABASE_RELEASE_ID
+ AND edr.EXTERNAL_DATABASE_ID = ed.EXTERNAL_DATABASE_ID
+ ) t LEFT OUTER JOIN CHEBI.COMPOUNDS c on t.row_id = c.ID
+ UNION
+ SELECT
+ pathway_id
+ , reaction_id
+ , ext_db_name
+ , ext_db_version
+ , compound_node_id
+ , compound_source_id
+ , c.chebi_accession
+ , case when c.chebi_accession is not null then 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=' || c.chebi_accession else null end as chebi_url
+ , type
+ FROM (
+ SELECT
+ p.PATHWAY_ID
+ , prx.PATHWAY_REACTION_ID as reaction_id
+ , ed.NAME as ext_db_name
+ , edr.version as ext_db_version
+ , pn.pathway_node_id as compound_node_id
+ , pn.DISPLAY_LABEL as compound_source_id
+ , 'product' as type
+ , pn.row_id
+ FROM
+ APIDB.PATHWAYREACTION prx
+ , SRES.PATHWAY p
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.ONTOLOGYTERM ot
+ , SRES.EXTERNALDATABASE ed
+ , SRES.EXTERNALDATABASERELEASE edr
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND prx.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.ASSOCIATED_NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.NAME = 'molecular entity'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND p.EXTERNAL_DATABASE_RELEASE_ID = edr.EXTERNAL_DATABASE_RELEASE_ID
+ AND edr.EXTERNAL_DATABASE_ID = ed.EXTERNAL_DATABASE_ID
+ ) t2 LEFT OUTER JOIN CHEBI.COMPOUNDS c on t2.row_id = c.ID
+ ;
+
+
diff --git a/Model/lib/psql/webready/global/PathwayCompounds_ix.psql b/Model/lib/psql/webready/global/PathwayCompounds_ix.psql
new file mode 100644
index 000000000..3aae4dbb5
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayCompounds_ix.psql
@@ -0,0 +1,3 @@
+ create index PthCmpd_id_ix
+ on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name)
+ ;
diff --git a/Model/lib/psql/webready/global/PathwayEC.psql b/Model/lib/psql/webready/global/PathwayEC.psql
new file mode 100644
index 000000000..6a688da18
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayEC.psql
@@ -0,0 +1,31 @@
+ drop table if exists :SCHEMA.PathwayEC;
+
+ /* this table represents the universe of EC numbers that are associated with enzyme nodes in metabolic pathways*/
+
+ CREATE TABLE :SCHEMA.PathwayEC as
+
+ SELECT DISTINCT ec.enzyme_class_id -- use this for joining back to pathways later
+ , ec.ec_number -- useful for quick exact matches
+ , ec.ec_number_1 --have the 4 EC number components separately avoids lots of like statements later
+ , ec.ec_number_2
+ , ec.ec_number_3
+ , ec.ec_number_4
+ , regexp_count(ec.ec_number, '-') as wildcard_count -- how many of the enzyme number positions are unknown
+ , pn.pathway_id
+ , p.external_database_release_id
+
+ FROM sres.EnzymeClass ec
+ , sres.OntologyTerm ot
+ , sres.PathwayNode pn
+ , sres.Pathway p
+ -- find all pathway nodes representing enzymes
+
+ WHERE pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'enzyme'
+ AND p.pathway_id = pn.pathway_id
+ -- we don't want the root or this gets matched to everything!
+ AND pn.display_label != '-.-.-.-'
+ -- now we can get the enzyme details for the enzyme pathway nodes
+ AND ec.enzyme_class_id = pn.row_id
+ ;
+
diff --git a/Model/lib/psql/webready/global/PathwayEC_ix.psql b/Model/lib/psql/webready/global/PathwayEC_ix.psql
new file mode 100644
index 000000000..0c652289a
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayEC_ix.psql
@@ -0,0 +1,6 @@
+CREATE INDEX PathwayEC_1_idx ON :SCHEMA.PathwayEC (pathway_id, ec_number, external_database_release_id)
+ ;
+
+
+CREATE INDEX PathwayEC_2_idx ON :SCHEMA.PathwayEC (ec_number_1, ec_number_2, ec_number_3, ec_number_4, enzyme_class_id, ec_number, wildcard_count)
+ ;
diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
new file mode 100644
index 000000000..bcd097928
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayNodes.psql
@@ -0,0 +1,304 @@
+ DROP TABLE IF EXISTS :SCHEMA.NodesWithTypes;
+
+ CREATE UNLOGGED TABLE :SCHEMA.NodesWithTypes AS
+ SELECT pn.pathway_id
+ , CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS display_label
+ , pa.url
+ , CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS name
+ , pa.source_id AS node_identifier
+ , pn.pathway_node_id AS pathway_node_id
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , ot.name AS type
+ , NULL AS default_structure
+ FROM sres.pathwaynode pn
+ INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
+ LEFT JOIN :SCHEMA.PathwayAttributes pa ON pn.display_label = pa.source_id
+ WHERE ot.name = 'metabolic process'
+ UNION ALL
+ SELECT pn.pathway_id
+ , pn.display_label
+ , NULL AS url
+ , coalesce(ec.description, pn.display_label) AS name
+ , ec.ec_number AS node_identifier
+ , pn.pathway_node_id AS pathway_node_id
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , ot.name AS type
+ , NULL AS default_structure
+ FROM sres.pathwaynode pn
+ INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
+ LEFT JOIN sres.enzymeclass ec ON pn.row_id = ec.enzyme_class_id
+ WHERE ot.name = 'enzyme'
+ GROUP BY pn.pathway_id
+ , pn.display_label
+ , ec.description
+ , ec.ec_number
+ , pn.pathway_node_id
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , ot.name
+ UNION ALL
+ SELECT pn.pathway_id
+ , pn.display_label
+ , NULL AS url
+ , coalesce(c.name, pn.display_label) AS name
+ , c.chebi_accession AS node_identifier
+ , pn.pathway_node_id AS pathway_node_id
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , ot.name AS type
+ , st.default_structure
+ FROM sres.pathwaynode pn
+ INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
+ LEFT JOIN chebi.compounds c ON pn.row_id = c.id
+ LEFT JOIN (
+ SELECT n.compound
+ , s.structure AS default_structure
+ FROM chebi.structures s
+ , (SELECT id
+ , compound
+ FROM :SCHEMA.CompoundId
+ WHERE type IN ('same ID', 'child ID')
+ ) n
+ WHERE n.id = 'CHEBI:' || s.compound_id
+ AND s.type = 'mol'
+ AND s.dimension = '2D'
+ AND s.default_structure = 'Y'
+ ) st ON c.chebi_accession = st.compound
+ WHERE ot.name = 'molecular entity'
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.ReactionsWithReversibility;
+
+ CREATE UNLOGGED TABLE :SCHEMA.ReactionsWithReversibility AS
+ SELECT DISTINCT spr.pathway_relationship_id
+ , tpr.is_reversible
+ , tpr.reaction_source_id
+ FROM sres.pathwayrelationship spr
+ , apidb.pathwayreactionrel prr
+ , :SCHEMA.PathwayReactions tpr
+ WHERE prr.pathway_relationship_id = spr.pathway_relationship_id
+ AND tpr.reaction_id = prr.pathway_reaction_id
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.EnzymeEdges;
+
+ CREATE UNLOGGED TABLE :SCHEMA.EnzymeEdges AS
+ SELECT DISTINCT nwt.pathway_id AS pathway_id
+ , nwt.pathway_node_id AS e_id
+ , nwt.type
+ , i.associated_node_id AS m1_id
+ , rri.is_reversible AS ir1
+ , o.node_id AS m2_id
+ , rro.is_reversible AS ir2
+ , i.associated_node_id || '_' || o.node_id || '_' || rri.is_reversible || '_' || rro.is_reversible as io
+ FROM sres.pathwayrelationship i
+ , sres.pathwayrelationship o
+ , :SCHEMA.NodesWithTypes nwt
+ , :SCHEMA.ReactionsWithReversibility rri
+ , :SCHEMA.ReactionsWithReversibility rro
+ WHERE i.node_id = nwt.pathway_node_id
+ AND o.associated_node_id = nwt.pathway_node_id
+ AND i.pathway_relationship_id = rri.pathway_relationship_id
+ AND o.pathway_relationship_id = rro.pathway_relationship_id
+ AND nwt.type = 'enzyme'
+ AND rri.is_reversible = rro.is_reversible
+ AND rri.reaction_source_id = rro.reaction_source_id
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.ParentNodes;
+
+ CREATE UNLOGGED TABLE :SCHEMA.ParentNodes AS
+ WITH AllEnzymeEdges AS (
+ SELECT string_agg(io, ',' ORDER BY io) AS all_edges
+ , e_id
+ , pathway_id
+ FROM :SCHEMA.EnzymeEdges
+ GROUP BY pathway_id
+ , e_id
+ )
+ , pn as (
+ SELECT pathway_id
+ , all_edges
+ , string_agg(e_id::varchar, '_' ORDER BY e_id) AS parent
+ FROM AllEnzymeEdges
+ GROUP BY pathway_id
+ , all_edges
+ HAVING COUNT (*) > 1
+ )
+ SELECT aee.e_id, pn.*
+ FROM pn
+ , AllEnzymeEdges aee
+ WHERE aee.all_edges = pn.all_edges
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.NodesWithParents;
+
+ CREATE UNLOGGED TABLE :SCHEMA.NodesWithParents AS
+ SELECT DISTINCT ee.e_id AS pathway_node_id
+ , pn.parent
+ , ee.type AS node_type
+ , ee.pathway_id
+ FROM :SCHEMA.EnzymeEdges ee
+ , :SCHEMA.ParentNodes pn
+ WHERE pn.pathway_id = ee.pathway_id
+ AND ee.e_id = pn.e_id
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.EnzymeReactions;
+
+ CREATE UNLOGGED TABLE :SCHEMA.EnzymeReactions AS
+ SELECT DISTINCT pn.PATHWAY_NODE_ID node_id
+ , pr.SOURCE_ID AS reaction_source_id
+ FROM sres.pathwaynode pn
+ , apidb.pathwayreaction pr
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYRELATIONSHIP prel
+ , sres.ontologyterm ot
+ WHERE (prel.NODE_ID = pn.PATHWAY_NODE_ID OR prel.ASSOCIATED_NODE_ID = pn.PATHWAY_NODE_ID)
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND ot.name = 'enzyme'
+ AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.ParentsForEdges;
+
+ CREATE UNLOGGED TABLE :SCHEMA.ParentsForEdges AS
+ SELECT ee.e_id
+ , ee.m1_id
+ , ee.ir1
+ , ee.m2_id
+ , ee.ir2
+ , np.parent
+ FROM :SCHEMA.EnzymeEdges ee
+ , :SCHEMA.NodesWithParents np
+ WHERE ee.e_id = np.pathway_node_id
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.PathwayEdges;
+
+ CREATE UNLOGGED TABLE :SCHEMA.PathwayEdges AS
+ SELECT pa.source_id
+ , pa.pathway_source
+ , rel.*
+ FROM (
+ SELECT DISTINCT ee.pathway_id
+ , coalesce(pe.parent, ee.e_id::varchar) AS source
+ , ee.m1_id::varchar AS target
+ , max(ee.ir1) AS is_reversible
+ FROM :SCHEMA.EnzymeEdges ee
+ LEFT JOIN :SCHEMA.ParentsForEdges pe ON ee.e_id = pe.e_id
+ GROUP BY ee.pathway_id, ee.m1_id, coalesce(pe.parent, ee.e_id::varchar)
+ UNION
+ SELECT DISTINCT ee.pathway_id
+ , ee.m2_id::varchar AS source
+ , coalesce(pe.parent, ee.e_id::varchar) AS target
+ , max(ee.ir2) AS is_reversible
+ FROM :SCHEMA.EnzymeEdges ee
+ LEFT JOIN :SCHEMA.ParentsForEdges pe ON ee.e_id = pe.e_id
+ GROUP BY ee.pathway_id, ee.m2_id, coalesce(pe.parent, ee.e_id::varchar)
+ UNION
+ SELECT pn1.pathway_id
+ , pr.node_id::varchar AS source
+ , pr.associated_node_id::varchar AS target
+ , pr.is_reversible
+ FROM sres.pathwayrelationship pr
+ , sres.pathwaynode pn1
+ , sres.pathwaynode pn2
+ , sres.ontologyterm ot1
+ , sres.ontologyterm ot2
+ WHERE pr.node_id = pn1.pathway_node_id
+ AND pr.associated_node_id = pn2.pathway_node_id
+ AND pn1.pathway_node_type_id = ot1.ontology_term_id
+ AND pn2.pathway_node_type_id = ot2.ontology_term_id
+ AND ot1.name != 'enzyme'
+ AND ot2.name != 'enzyme'
+ ) rel
+ , :SCHEMA.PathwayAttributes pa
+ WHERE pa.pathway_id = rel.pathway_id
+ ;
+
+ CREATE TABLE :SCHEMA.PathwayNodes AS
+ SELECT pa.source_id
+ , pa.pathway_source
+ , pn.display_label
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , coalesce(pn.url,
+ CASE WHEN coalesce(type, nodes_with_parents.node_type) = 'enzyme' THEN
+ CASE
+ -- CHECK AND FIX
+ --WHEN REGEXP_LIKE (display_label, '^\d+\.(\d|-)+\.(\d|-)+\.(\d|-)+$') THEN 'https://enzyme.expasy.org/EC/' || display_label
+ WHEN pa.pathway_source = 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || reaction_source_id
+ WHEN pa.pathway_source = 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || reaction_source_id
+ WHEN pa.pathway_source = 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || reaction_source_id
+ WHEN pa.pathway_source = 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || reaction_source_id
+ ELSE NULL END
+ ELSE NULL END) AS url
+ , pn.name
+ , pn.node_identifier
+ , nodes_with_parents.pathway_node_id AS id
+ , nodes_with_parents.parent
+ , reaction_source_id
+ , coalesce(type, nodes_with_parents.node_type) AS node_type
+ , pn.default_structure
+ FROM
+ ( SELECT e_id::varchar AS pathway_node_id
+ , nwp.parent
+ , type AS node_type
+ , ee.pathway_id
+ , er.reaction_source_id
+ FROM :SCHEMA.EnzymeEdges ee
+ INNER JOIN :SCHEMA.EnzymeReactions er ON er.node_id = ee.e_id
+ LEFT JOIN :SCHEMA.NodesWithParents nwp ON ee.e_id = nwp.pathway_node_id
+ UNION
+ SELECT nwp.parent
+ , NULL
+ , 'nodeOfNodes'
+ , pathway_id
+ , NULL
+ FROM :SCHEMA.NodesWithParents nwp
+ UNION
+ SELECT nwt.pathway_node_id::varchar AS pathway_node_id
+ , NULL AS parent
+ , nwt.type
+ , nwt.pathway_id
+ , NULL
+ FROM :SCHEMA.NodesWithTypes nwt
+ WHERE nwt.type != 'enzyme'
+ ) nodes_with_parents
+ INNER JOIN :SCHEMA.PathwayAttributes pa ON nodes_with_parents.pathway_id = pa.pathway_id
+ LEFT JOIN :SCHEMA.NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar
+ ;
+
+drop table if exists :SCHEMA.NodesWithTypes;
+drop table if exists :SCHEMA.ReactionsWithReversibility;
+drop table if exists :SCHEMA.EnzymeEdges;
+drop table if exists :SCHEMA.ParentNodes;
+drop table if exists :SCHEMA.NodesWithParents;
+drop table if exists :SCHEMA.EnzymeReactions;
+drop table if exists :SCHEMA.ParentsForEdges;
+
+
+
+
+
+
+
diff --git a/Model/lib/psql/webready/global/PathwayReactions.psql b/Model/lib/psql/webready/global/PathwayReactions.psql
new file mode 100644
index 000000000..93e6e626c
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayReactions.psql
@@ -0,0 +1,142 @@
+ drop table if exists :SCHEMA.PR_rep;
+
+ create unlogged table :SCHEMA.PR_rep as
+ SELECT DISTINCT
+ pr.PATHWAY_REACTION_ID as reaction_id
+ , pr.SOURCE_ID as reaction_source_id
+ , pn.DISPLAY_LABEL as enzyme
+ , coalesce(ca.compound_name, pc.compound_source_id) as compound
+ , prel.is_reversible as is_reversible_og
+ , last_value(prel.is_reversible) OVER (partition by pr.pathway_reaction_id ORDER BY prel.is_reversible ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) as is_reversible
+ , first_value(pc.type) over (partition by pr.pathway_reaction_id, pr.SOURCE_ID, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE, coalesce(pc.chebi_accession, pc.compound_source_id), coalesce(ca.compound_name, pc.compound_source_id) ORDER BY pc.pathway_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as type
+ FROM
+ sres.pathway p
+ , apidb.pathwayreaction pr
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.ONTOLOGYTERM ot
+ , :SCHEMA.PathwayCompounds pc
+ LEFT JOIN :SCHEMA.CompoundAttributes ca ON pc.chebi_accession = ca.source_id
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.name = 'enzyme'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND pc.PATHWAY_ID = p.PATHWAY_ID
+ AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
+ ;
+
+
+ drop table if exists :SCHEMA.PR_t1;
+
+ create unlogged table :SCHEMA.PR_t1 as
+ SELECT DISTINCT
+ pr.PATHWAY_REACTION_ID as reaction_id
+ , pr.SOURCE_ID as reaction_source_id
+ , ed.NAME as ext_db_name
+ , edr.VERSION as ext_db_version
+ , cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme
+ , min(PR_rep.is_reversible) as is_reversible
+ , min(PR_rep.type) as type
+ , string_agg (pc.type, ',' order by p.pathway_id) as type_list
+ , coalesce(ca.compound_name, pc.compound_source_id) as compound
+ , CASE
+ WHEN coalesce(pc.CHEBI_ACCESSION, pc.compound_source_id) LIKE 'CHEBI%'
+ THEN '' || coalesce(ca.compound_name, pc.compound_source_id) || ''
+ ELSE coalesce(pc.chebi_accession, pc.compound_source_id)
+ END as compound_url
+ , CASE (replace (replace (ed.name, 'Pathways_', ''), '_RSRC', ''))
+ WHEN 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || pr.source_id
+ WHEN 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'FungiCyc' THEN NULL
+ END as reaction_url
+ FROM
+ sres.pathway p
+ , apidb.pathwayreaction pr
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.EXTERNALDATABASE ed
+ , SRES.EXTERNALDATABASERELEASE edr
+ , SRES.ONTOLOGYTERM ot
+ , :SCHEMA.pr_rep pr_rep
+ , :SCHEMA.PathwayCompounds pc
+ LEFT JOIN :SCHEMA.CompoundAttributes ca ON pc.chebi_accession = ca.source_id
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.name = 'enzyme'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND pc.EXT_DB_NAME = ed.NAME
+ AND pc.EXT_DB_VERSION = edr.VERSION
+ AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID
+ AND pc.PATHWAY_ID = p.PATHWAY_ID
+ AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
+ AND PR_rep.reaction_id = pr.pathway_reaction_id
+ AND PR_rep.reaction_source_id = pr.source_id
+ AND PR_rep.compound = coalesce(ca.compound_name, pc.compound_source_id)
+ AND PR_rep.enzyme = pn.DISPLAY_LABEL
+ AND PR_rep.is_reversible_og = prel.is_reversible
+ GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE
+ , coalesce(pc.chebi_accession, pc.compound_source_id)
+ , coalesce(ca.compound_name, pc.compound_source_id)
+ ;
+
+
+ CREATE TABLE :SCHEMA.PathwayReactions AS
+ SELECT o.*
+ , CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html
+ FROM (
+ SELECT i.*
+ , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-'
+ THEN
+ 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1='
+ || ec.ec_number_1
+ || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END
+ || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END
+ || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END
+ ELSE reaction_url END as expasy_url
+ , ec.description as enzyme_description
+ FROM (
+ SELECT
+ reaction_id
+ , reaction_source_id
+ , reaction_url
+ , ext_db_name
+ , ext_db_version
+ , enzyme
+ , substrates_html || ' ' || sign || ' ' || products_html as equation_html
+ , substrates_text || ' ' || sign || ' ' || products_text as equation_text
+ , case when sign = '<=>' then 1 else 0 end as is_reversible
+ , substrates_text
+ , products_text
+ FROM (
+ SELECT
+ reaction_id
+ , reaction_source_id
+ , reaction_url
+ , ext_db_name
+ , ext_db_version
+ , enzyme
+ , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign
+ , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html
+ , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text
+ , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html
+ , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text
+ FROM :SCHEMA.PR_t1 as t1
+ GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible
+ ) t2
+ ) i
+ LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number
+ ) o
+ ;
+
+
+ drop table if exists :SCHEMA.PR_rep;
+ drop table if exists :SCHEMA.PR_t1;
diff --git a/Model/lib/psql/webready/global/PathwayReactions_ix.psql b/Model/lib/psql/webready/global/PathwayReactions_ix.psql
new file mode 100644
index 000000000..50dd09d63
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayReactions_ix.psql
@@ -0,0 +1,3 @@
+ create index PathRcts_id_ix
+ on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name)
+ ;
diff --git a/Model/lib/psql/webready/keep/DatasetExampleSourceId.psql b/Model/lib/psql/webready/keep/DatasetExampleSourceId.psql
new file mode 100644
index 000000000..4e0137173
--- /dev/null
+++ b/Model/lib/psql/webready/keep/DatasetExampleSourceId.psql
@@ -0,0 +1,22 @@
+:CREATE_AND_POPULATE
+ WITH profiles AS (
+ SELECT p.source_id,
+ ga.sequence_id,
+ d.name,
+ row_number() over(partition by d.name
+ order by ga.chromosome_order_num, p.profile_as_string desc) as rn
+ FROM :SCHEMA.Profile p
+ INNER JOIN sres.ExternalDatabase d ON p.dataset_name = d.name
+ LEFT JOIN :SCHEMA.GeneAttributes ga ON p.source_id = ga.source_id
+ WHERE p.profile_as_string is not null
+ and ga.org_abbrev = ':ORG_ABBREV'
+ and p.org_abbrev = ':ORG_ABBREV'
+ )
+ SELECT
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ p.source_id as example_source_id, p.sequence_id, p.name as dataset
+ FROM profiles p
+ WHERE p.rn = 1
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/keep/DatasetExampleSourceId_ix.psql b/Model/lib/psql/webready/keep/DatasetExampleSourceId_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/keep/OrganismAbbreviationBlast.psql b/Model/lib/psql/webready/keep/OrganismAbbreviationBlast.psql
new file mode 100644
index 000000000..faf228674
--- /dev/null
+++ b/Model/lib/psql/webready/keep/OrganismAbbreviationBlast.psql
@@ -0,0 +1,40 @@
+:CREATE_AND_POPULATE
+ select sub.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date
+ from (
+ SELECT organism, parent, abbreviation
+ FROM OrganismAbbreviationWS
+ UNION
+ -- all familes for popsets
+ SELECT DISTINCT family_name_for_files || ' Popset/Genbank Isolates' as organism, '' as parent,
+ family_name_for_files as abbreviation
+ FROM apidb.Organism
+ WHERE family_name_for_files is not null
+ AND abbrev || '_isolates_genbank_RSRC' IN (SELECT external_db_name as db_name FROM PopsetAttributes)
+ AND family_name_for_files NOT IN ('Culicosporidae', 'Dubosqiidae', 'Ordosporidae')
+ UNION
+ SELECT special.organism, special.parent, special.abbreviation
+ FROM OrganismTree ot,
+ ( -- all species and speciesAbbreviations from apidb.Organism where we have ests
+ SELECT DISTINCT
+ sp.name as organism, ot.parentTerm as parent,
+ regexp_replace(org.name_for_filenames, replace(org.strain_abbrev, '/','_'),'') as abbreviation
+ FROM sres.TaxonName sp, TaxonSpecies ts, apidb.Organism org, OrganismTree ot
+ WHERE org.taxon_id = ts.taxon_id
+ AND ts.species_taxon_id = sp.taxon_id
+ AND sp.name_class = 'scientific name'
+ AND ot.term = sp.name
+ AND org.strain_abbrev is not null
+ AND org.name_for_filenames is not null
+ AND sp.taxon_id
+ in (SELECT etn.taxon_id
+ FROM sres.TaxonName etn
+ WHERE etn.name in (SELECT organism FROM EstAttributes))
+ UNION
+ SELECT 'Cryptosporidiidae SSU_18srRNA Reference Isolates' as organism,
+ 'Cryptosporidium' as parent, 'CryptosporidiidaeReference' as abbreviation
+ ) special
+ WHERE special.parent = ot.term
+ ) sub
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/keep/OrganismAbbreviationBlast_ix.psql b/Model/lib/psql/webready/keep/OrganismAbbreviationBlast_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/keep/OrganismAttributes.psql b/Model/lib/psql/webready/keep/OrganismAttributes.psql
new file mode 100644
index 000000000..bda5e8a2b
--- /dev/null
+++ b/Model/lib/psql/webready/keep/OrganismAttributes.psql
@@ -0,0 +1,340 @@
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVDataSourceCount AS
+ SELECT
+ taxon_id,
+ max(CASE WHEN stype = 'organellar' THEN num ELSE null END) as organellar_has,
+ max(CASE WHEN stype = 'Epitope' THEN num ELSE null END) as Epitope_has,
+ max(CASE WHEN stype = 'Array' THEN num ELSE null END) as Array_has,
+ max(CASE WHEN stype = 'HTSIsolate' THEN num ELSE null END) as HTSIsolate_has,
+ max(CASE WHEN stype = 'Popset' THEN num ELSE null END) as Popset_has
+ FROM (
+ SELECT DISTINCT enas.taxon_id, 'organellar' AS stype, 1 AS num
+ FROM dots.externalNAsequence enas, SRES.ontologyterm ot
+ WHERE enas.sequence_ontology_id = ot.ontology_term_id
+ AND ot.name in( 'mitochondrial_chromosome','apicoplast_chromosome')
+ and enas.taxon_id = :TAXON_ID
+ GROUP BY enas.taxon_id
+ UNION
+ SELECT distinct ds.taxon_id, 'HTSIsolate' AS stype, 1 AS num
+ FROM apidb.DataSource ds
+ WHERE ds.type = 'isolates' AND ds.subtype = 'HTS_SNP'
+ AND ds.taxon_id = :TAXON_ID
+ GROUP BY taxon_id
+ UNION
+ SELECT distinct ds.taxon_id, 'Popset' AS stype, 1 AS num
+ FROM apidb.DataSource ds
+ WHERE ds.subtype = 'sequenceing_types'
+ AND ds.taxon_id = :TAXON_ID
+ GROUP BY taxon_id
+ UNION
+ SELECT distinct ds.taxon_id, 'Epitope' AS stype, 1 AS num
+ FROM apidb.DataSource ds
+ WHERE ds.type = 'epitope'
+ AND ds.taxon_id = :TAXON_ID
+ GROUP BY taxon_id
+ UNION
+ SELECT distinct ds.taxon_id, 'Array' AS stype, 1 AS num
+ FROM apidb.DataSource ds
+ WHERE ds.type = 'transcript_expression'
+ AND ds.subtype = 'array'
+ AND ds.taxon_id = :TAXON_ID
+ GROUP BY taxon_id
+ ) t
+ GROUP BY taxon_id
+
+ ;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVOrganismCentromere AS
+ SELECT distinct s.taxon_id,
+ case when count(*) > 0 then 1 else 0 end as hasCentromere
+ FROM DOTS.MISCELLANEOUS f
+ , sres.ontologyTerm ot
+ , dots.nasequence s
+ WHERE ot.ontology_term_id = f.sequence_ontology_id
+ AND ot.name='centromere'
+ AND f.na_sequence_id = s.na_sequence_id
+ AND s.taxon_id = :TAXON_ID
+ GROUP BY s.taxon_id
+
+ ;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVSequenceCount AS
+ SELECT
+ taxon_id,
+ max(CASE WHEN sequence_type = 'contig' THEN num ELSE null END) as contig_num,
+ max(CASE WHEN sequence_type = 'supercontig' THEN num ELSE null END) as supercont_num,
+ max(CASE WHEN sequence_type = 'chromosome' THEN num ELSE null END) as chrom_num
+ FROM (
+ SELECT count(*) as num, sequence_type, taxon_id
+ FROM :SCHEMA.GenomicSeqAttributes
+ WHERE is_top_level =1
+ and org_abbrev = :ORG_ABBREV
+ GROUP BY taxon_id, sequence_type
+ ) t
+ GROUP BY taxon_id
+
+ ;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVCommunityCount AS
+ -- SELECT taxon_id, count(*) as communityCount
+ -- TODO: addd this back
+ select taxon_id, 0 as communityCount
+ FROM :SCHEMA.GeneAttributes
+ where org_abbrev = ':ORG_ABBREV'
+ --WHERE
+ --(source_id, project_id) IN (
+ --SELECT distinct stable_id, project_name
+ --userlogins5.mappedComment@prodn.login_comment
+ --FROM userlogins5.mappedComment
+ --WHERE is_visible = 1
+ --AND comment_target_id = 'gene'
+ --)
+ GROUP BY taxon_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProfileCount AS
+ SELECT ga.taxon_id,
+ count(distinct(case when p.dataset_type = 'transcript_expression'
+ and p.dataset_subtype like '%rt_pcr%'
+ and ga.is_deprecated = 0
+ then p.source_id
+ else ''
+ end)) as rtPCRCount,
+ count(distinct(case when p.dataset_type = 'transcript_expression'
+ and p.dataset_subtype = 'rnaseq'
+ and ga.is_deprecated = 0
+ then p.source_id
+ else ''
+ end)) as rnaSeqCount,
+ count(distinct(case when p.dataset_type = 'transcript_expression'
+ and p.dataset_subtype = 'array'
+ and ga.is_deprecated = 0
+ then p.source_id
+ else ''
+ end)) as geneArrayCount
+ FROM :SCHEMA.Profile p
+ RIGHT OUTER JOIN :SCHEMA.GeneAttributes ga ON ga.source_id = p.source_id
+ WHERE ga.org_abbrev = ':ORG_ABBREV'
+ and p.org_abbrev = ':ORG_ABBREV'
+ GROUP BY ga.taxon_id
+
+ ;
+
+
+
+ -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPopsetCount AS
+ -- SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id
+ -- FROM (
+ -- (SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id
+ -- FROM dots.similarity s, PopsetAttributes i,
+ -- core.tableinfo t, dots.nasequence nas
+ -- WHERE s.query_id = i.na_sequence_id
+ -- AND nas.na_sequence_id = s.subject_id
+ -- AND t.table_id = s.subject_table_id
+ -- AND t.table_id = s.query_table_id
+ -- AND t.name = 'ExternalNASequence'
+ -- AND s.pvalue_exp <= -10
+ -- and nas.taxon_id = :TAXON_ID
+ -- ) sim LEFT JOIN
+ -- (SELECT i.source_id, seq.source_id as sequence_id
+ -- FROM dots.similarity s, PopsetAttributes i, GeneAttributes g,
+ -- core.tableinfo t, dots.nasequence seq
+ -- WHERE s.query_id = i.na_sequence_id
+ -- AND s.subject_id = g.na_sequence_id
+ -- AND t.table_id = s.subject_table_id
+ -- AND t.table_id = s.query_table_id
+ -- AND s.min_subject_start <= g.end_max
+ -- AND s.max_subject_end >= g.start_min
+ -- AND g.na_sequence_id = seq.na_sequence_id
+ -- AND t.name = 'ExternalNASequence'
+ -- and seq.taxon_id = :TAXON_ID
+ -- ) gene
+ -- ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id)
+ -- GROUP BY sim.taxon_id
+
+ -- ;
+
+
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGeneCount AS
+ SELECT genomestat.taxon_id,
+ genomestat.project_id,
+ genomestat.database_version,
+ genomestat.ncbi_tax_id,
+ genomestat.Megabps,
+ -- coalesce(snpCount.ct,0) as snpCount,
+ coalesce(count(distinct ga.source_id),0) as geneCount,
+ coalesce(count(distinct case when ga.is_pseudo =1 then ga.source_id else '' end),0) as pseudoGeneCount,
+ coalesce(count(distinct case when (ga.gene_type ='protein coding' or ga.gene_type ='protein coding gene') then ga.source_id else '' end),0) as codingGeneCount,
+ coalesce(count(distinct case when (ga.gene_type ='protein coding' or ga.gene_type ='protein coding gene') then '' else ga.source_id end),0) as otherGeneCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then cct.gene_source_id
+ else NULL
+ end)),0) ChipChipGeneCount ,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then pp.source_id
+ else NULL
+ end)),0) orthologCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then gts.gene_source_id
+ else NULL
+ end)),0) goCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then tfbs.gene_source_id
+ else NULL
+ end)),0) tfbsCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then mss.aa_sequence_id
+ else NULL
+ end)),0) proteomicsCount,
+ -- coalesce(count (distinct (case when ga.is_deprecated = 0
+ -- then est.source_id
+ -- else NULL
+ -- end)),0) estCount,
+ coalesce(count (distinct (case when (ga.is_deprecated = 0 and ta.ec_numbers is not null)
+ then ga.source_id
+ else NULL
+ end)),0) ecNumberCount
+ FROM :SCHEMA.GeneAttributes ga
+ LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id and ga.org_abbrev = ':ORG_ABBREV'
+ LEFT OUTER JOIN :SCHEMA.gotermsummary gts on ga.source_id = gts.gene_source_id and gts.org_abbrev = ':ORG_ABBREV'
+ LEFT OUTER JOIN :SCHEMA.TFBSGene tfbs on ga.source_id = tfbs.gene_source_id and tfbs.org_abbrev = ':ORG_ABBREV'
+ LEFT OUTER JOIN :SCHEMA.TranscriptAttributes ta on ta.gene_source_id = ga.source_id and ta.org_abbrev = ':ORG_ABBREV'
+ LEFT OUTER JOIN apidb.MassSpecSummary mss on ta.aa_sequence_id = mss.aa_sequence_id
+ LEFT OUTER JOIN :SCHEMA.chipchipTranscript cct on ga.source_id = cct.gene_source_id and cct.org_abbrev = ':ORG_ABBREV'
+ -- LEFT OUTER JOIN (
+ -- SELECT distinct s.gene as source_id
+ -- FROM EstAlignmentGeneSummary s, EstAttributes e
+ -- WHERE s.est_gene_overlap_length >= 100
+ -- AND s.is_best_alignment in (1)
+ -- AND s.percent_est_bases_aligned >= 20
+ -- AND s.percent_identity >= 90
+ -- AND e.best_alignment_count <= 1
+ -- AND e.source_id = s.accession
+ -- GROUP by s.gene HAVING count(*) >= 1
+ -- ) est ON ga.source_id = est.source_id
+ RIGHT OUTER JOIN (
+ SELECT project_id, taxon_id,
+ max(database_version) as database_version,
+ CASE WHEN ncbi_tax_id > 9000000000 THEN NULL
+ ELSE ncbi_tax_id
+ END ncbi_tax_id,
+ to_char(sum(length)/1000000,'9999.99') as megabps
+ FROM :SCHEMA.GenomicSeqAttributes
+ WHERE is_top_level = 1
+ AND org_abbrev = ':ORG_ABBREV'
+ GROUP BY project_ID, taxon_id, ncbi_tax_id
+ ) genomestat ON genomestat.taxon_id = ga.taxon_id
+ -- LEFT OUTER JOIN (
+ -- SELECT count(distinct ga.source_id) as ct, ga.taxon_id
+ -- FROM GeneAttributes ga, SnpAttributes sf
+ -- WHERE sf.gene_source_id = ga.source_id
+ -- AND ga.is_deprecated = 0
+ -- GROUP BY ga.taxon_id
+ -- ) snpCount ON ga.taxon_id = snpCount.taxon_id
+ GROUP BY genomestat.taxon_id,
+ genomestat.project_id,
+ genomestat.database_version,
+ genomestat.ncbi_tax_id,
+ genomestat.Megabps
+ -- snpCount.ct
+
+ ;
+
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id
+ , CASE WHEN ltrim(replace(oa.organism_name, tn2.name, ''))= oa.organism_name
+ THEN strain_abbrev
+ ELSE ltrim(replace(oa.organism_name, tn2.name, '')) END AS strain
+ FROM (
+ SELECT o.project_name as project_id,
+ case when t.ncbi_tax_id > 10000000
+ -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' ||
+ -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1"
+ -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits
+ then 'TMPTX_' || o.public_abbrev
+ else 'NCBITAXON_' || t.ncbi_tax_id
+ end as source_id,
+ o.abbrev as internal_abbrev,
+ o.public_abbrev,
+ o.orthomcl_abbrev,
+ o.family_name_for_files,
+ tn.name as organism_name,
+ o.genome_source,
+ o.strain_abbrev,
+ o.is_annotated_genome,
+ o.is_reference_strain,
+ o.is_family_representative,
+ o.name_for_filenames,
+ o.taxon_id as component_taxon_id,
+ gc.database_version,
+ gc.megabps as megabps,
+ gc.ncbi_tax_id as ncbi_tax_id,
+ gc.snpCount as snpCount,
+ gc.geneCount as geneCount,
+ gc.pseudoGeneCount as pseudoGeneCount,
+ gc.codingGeneCount as codingGeneCount,
+ gc.otherGeneCount as otherGeneCount,
+ gc.ChipChipGeneCount as ChipChipGeneCount,
+ gc.orthologCount as orthologCount,
+ gc.goCount as goCount,
+ gc.tfbsCount as tfbsCount,
+ gc.proteomicsCount as proteomicsCount,
+ gc.estCount as estCount,
+ gc.ecNumberCount as ecNumberCount,
+ cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar,
+ cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate,
+ cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset,
+ cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope,
+ cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray,
+ coalesce(oc.hasCentromere, 0) as hasCentromere,
+ coalesce(sc.contig_num, 0) as contigCount,
+ coalesce(sc.supercont_num, 0) as supercontigCount,
+ coalesce(sc.chrom_num, 0) as chromosomeCount,
+ coalesce(cc.communityCount, 0) as communityCount,
+ --coalesce(psc.popsetCount, 0) as popsetCount,
+ coalesce(pc.geneArrayCount, 0) as arrayGeneCount,
+ coalesce(pc.rnaSeqCount, 0) as rnaSeqCount,
+ coalesce(pc.rtPCRCount, 0) as rtPCRCount,
+ coalesce(ta.avg_transcript_length, 0) as avg_transcript_length
+ FROM apidb.Organism o
+ INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id and o.taxon_id = :TAXON_ID
+ INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVDataSourceCount dsc ON o.taxon_id = dsc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVOrganismCentromere oc ON o.taxon_id = oc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVSequenceCount sc ON o.taxon_id = sc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVCommunityCount cc ON o.taxon_id = cc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVGeneCount gc ON o.taxon_id = gc.taxon_id
+ --LEFT JOIN :SCHEMA.:ORG_ABBREVpopsetCount psc ON o.taxon_id = psc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVprofileCount pc ON o.taxon_id = pc.taxon_id
+ LEFT JOIN (
+ SELECT taxon_id, round(avg(length),1) as avg_transcript_length
+ FROM :SCHEMA.TranscriptAttributes
+ where org_abbrev = ':ORG_ABBREV'
+ GROUP by taxon_id
+ ) ta ON o.taxon_id = ta.taxon_id
+ WHERE tn.name_class = 'scientific name'
+ ) oa,
+ :SCHEMA.TaxonSpecies ts,
+ sres.taxon t,
+ sres.taxonname tn2
+ WHERE oa.component_taxon_id = ts.taxon_id
+ AND ts.species_taxon_id = t.taxon_id
+ AND ts.species_taxon_id = tn2.taxon_id
+ AND tn2.name_class = 'scientific name'
+ AND o.taxon_id = :TAXON_ID
+ AND ts.org_abbrev = ':ORG_ABBREV'
+
+
+:DECLARE_PARTITION;
+
+drop table :SCHEMA.:ORG_ABBREVDataSourceCount;
+drop table :SCHEMA.:ORG_ABBREVOrganismCentromere;
+drop table :SCHEMA.:ORG_ABBREVProfileCount;
+--drop table :SCHEMA.:ORG_ABBREVPopsetCount;
+drop table :SCHEMA.:ORG_ABBREVGeneCount;
+drop table :SCHEMA.:ORG_ABBREVSequenceCount;
+drop table :SCHEMA.:ORG_ABBREVCommunityCount;
diff --git a/Model/lib/psql/webready/keep/OrganismAttributes_ix.psql b/Model/lib/psql/webready/keep/OrganismAttributes_ix.psql
new file mode 100644
index 000000000..d1f420c0d
--- /dev/null
+++ b/Model/lib/psql/webready/keep/OrganismAttributes_ix.psql
@@ -0,0 +1,3 @@
+create unique Organism_sourceId_idx ON :SCHEMA.OrganismAttributes (source_id)
+ ;
+
diff --git a/Model/lib/psql/webready/keep/Profile.psql b/Model/lib/psql/webready/keep/Profile.psql
new file mode 100644
index 000000000..7db9b02f9
--- /dev/null
+++ b/Model/lib/psql/webready/keep/Profile.psql
@@ -0,0 +1,800 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE Profile (
+ DATASET_NAME VARCHAR(200),
+ DATASET_TYPE VARCHAR(50),
+ DATASET_SUBTYPE VARCHAR(50),
+ PROFILE_TYPE VARCHAR(30),
+ NODE_TYPE VARCHAR(100),
+ SOURCE_ID VARCHAR(500),
+ PROFILE_STUDY_ID NUMERIC(7),
+ PROFILE_SET_NAME VARCHAR(400),
+ PROFILE_SET_SUFFIX VARCHAR(50),
+ PROFILE_AS_STRING VARCHAR(4000),
+ MAX_VALUE NUMERIC,
+ MIN_VALUE NUMERIC,
+ MAX_TIMEPOINT VARCHAR(200),
+ MIN_TIMEPOINT VARCHAR(200)
+ )
+
+
+:DECLARE_PARTITION;
+
+
+
+ DO $$
+ DECLARE
+ ctrows NUMERIC := 0;
+ commit_after NUMERIC := 10000;
+ pf_rows record;
+ BEGIN
+ FOR pf_rows IN (
+ SELECT
+ ds.name as dataset_name, ds.type as dataset_type,
+ ds.subtype as dataset_subtype, profile.profile_type, profile.node_type,
+ profile.source_id, profile.node_set_id as profile_study_id,
+ ps.name as profile_set_name,
+ cast(case
+ when regexp_replace(ps.name, '\[.+\]', '') like '% - %'
+ then regexp_replace(regexp_replace(ps.name, ' *\[.+\]', ''), '.+ - ', '')
+ -- special cases for legacy datasets
+ when regexp_replace(ps.name, '\[.+\]', '') like 'DeRisi%'
+ then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'DeRisi ', '')
+ when regexp_replace(ps.name, '\[.+\]', '') like 'winzeler_cc_%'
+ then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'winzeler_cc_', '')
+ when regexp_replace(ps.name, '\[.+\]', '') like 'Llinas RT transcription and decay %'
+ then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'Llinas RT transcription and decay ', '')
+ when regexp_replace(ps.name, '\[.+\]', '') like 'T.brucei paired end RNA-Seqdata from Horn%'
+ then regexp_replace(
+ regexp_replace(
+ regexp_replace(
+ regexp_replace(
+ regexp_replace(ps.name, '\[.+ nonunique\]', ''),
+ '\[.+ unique\]', ' - unique'),
+ '\[.+\]', ''),
+ 'aligned with cds coordinates ', 'cds coordinates'),
+ 'T.brucei paired end RNA-Seqdata from Horn ', '')
+ else null
+ end as varchar(50)
+ ) as profile_set_suffix,
+ CASE WHEN replace(profile.profile_as_string, 'NA' || CHR(9), '') = 'NA' THEN null ELSE profile.profile_as_string END as profile_as_string,
+ profile.max_value, profile.min_value,
+ profile.max_timepoint, profile.min_timepoint
+ FROM apidb.DataSource ds, sres.ExternalDatabase d,
+ sres.ExternalDatabaseRelease r, study.NodeSet ps,
+ (
+ -- gene profiles
+ SELECT gene_profile.node_set_id, ga.source_id, gene_profile.profile_type, gene_profile.node_type,
+ gene_profile.profile_as_string, gene_profile.max_value, gene_profile.min_value,
+ gene_profile.max_timepoint, gene_profile.min_timepoint
+ FROM GeneAttributes ga,
+ (
+ SELECT * FROM (
+ WITH result AS (
+ SELECT na_feature_id, protocol_app_node_id, max(value) as value
+ FROM results.NAFEATUREHOSTRESPONSE
+ GROUP BY na_feature_id, protocol_app_node_id
+ ), min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ from study.protocolappnode pan, study.NodeNodeSet sl, study.NodeSet s, result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, min_max, result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t1
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.mean_phenotype) over w1 as max_value,
+ first_value(result.mean_phenotype) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.CrisprPhenotype result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.mean_phenotype ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.mean_phenotype::numeric, 2)::varchar,'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.CrisprPhenotype result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND result.na_feature_id = min_max.na_feature_id
+ AND sl.node_set_id = min_max.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t2
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.score) over w1 as max_value,
+ first_value(result.score) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeScore result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.score ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.score::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeScore result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t3
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.relative_growth_rate) over w1 as max_value,
+ first_value(result.relative_growth_rate) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeGrowthRate result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.relative_growth_rate ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.relative_growth_rate::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeGrowthRate result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t4
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t5
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.percentile_channel1) over w1 as max_value,
+ first_value(result.percentile_channel1) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.percentile_channel1 ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'channel1_percentiles' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.percentile_channel1::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t6
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.percentile_channel2) over w1 as max_value,
+ first_value(result.percentile_channel2) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.percentile_channel2 ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'channel2_percentiles' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.percentile_channel2::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t7
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.standard_error) over w1 as max_value,
+ first_value(result.standard_error) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.standard_error ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'standard_error' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.standard_error::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t8
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.pvalue) over w1 as max_value,
+ first_value(result.pvalue) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.pvalue ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'pvalue' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.pvalue::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t9
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.amplitude) over w1 as max_value,
+ first_value(result.amplitude) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.amplitude ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'amplitude' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.amplitude::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t10
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.period) over w1 as max_value,
+ first_value(result.period) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.period ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'period' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.period::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t11
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.probability_mean) over w1 as max_value,
+ first_value(result.probability_mean) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.probability_mean ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'probability_mean' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.probability_mean::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t12
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.sd) over w1 as max_value,
+ first_value(result.sd) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.sd ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'sd' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.sd::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t13
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.lower_CI) over w1 as max_value,
+ first_value(result.lower_CI) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.lower_CI ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'lower_CI' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.lower_CI::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t14
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.upper_CI) over w1 as max_value,
+ first_value(result.upper_CI) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.upper_CI ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'upper_CI' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.upper_CI::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t15
+-- TO FIX
+-- UNION ALL
+-- SELECT * FROM (
+-- WITH min_max AS (
+-- SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+-- null as max_value,
+-- null as min_value,
+-- last_value(pan.name) over w1 as max_timepoint,
+-- first_value(pan.name) over w1 as min_timepoint
+-- FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.HaplotypeResult result
+-- WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+-- AND result.protocol_app_node_id = pan.protocol_app_node_id
+-- AND sl.node_set_id = s.node_set_id
+-- WINDOW w1 AS (
+-- PARTITION BY sl.node_set_id, result.na_feature_id
+-- ORDER BY result.value ASC
+-- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+-- )
+-- )
+-- SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+-- string_agg(coalesce(result.value, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+-- min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+-- FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.HaplotypeResult result, min_max
+-- WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+-- AND result.protocol_app_node_id = pan.protocol_app_node_id
+-- AND sl.node_set_id = s.node_set_id
+-- AND min_max.na_feature_id = result.na_feature_id
+-- AND min_max.node_set_id = sl.node_set_id
+-- GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+-- ) t16
+ ) gene_profile
+ WHERE ga.na_feature_id = gene_profile.na_feature_id
+ UNION ALL
+ -- compound profiles
+ SELECT compound_profile.node_set_id,
+ case
+ WHEN compound_profile.isotopomer IS NOT NULL
+ THEN ca.source_id || '|' || compound_profile.isotopomer
+ WHEN compound_profile.mass IS NOT NULL
+ THEN ca.source_id || '|' || compound_profile.mass || '|' || compound_profile.retention_time
+ ELSE ca.source_id
+ END as source_id,
+ compound_profile.profile_type, compound_profile.node_type,
+ compound_profile.profile_as_string, compound_profile.max_value, compound_profile.min_value,
+ compound_profile.max_timepoint, compound_profile.min_timepoint
+ FROM CompoundAttributes ca,
+ (
+ SELECT * FROM (
+ WITH result AS (
+ SELECT max(value) as value, compound_id, protocol_app_node_id, isotopomer
+ FROM results.CompoundMassSpec
+ GROUP BY compound_id, protocol_app_node_id, isotopomer
+ ) , min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.compound_id, result.isotopomer,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.compound_id, result.isotopomer
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.compound_id, result.isotopomer, 'values' as profile_type, s.node_type,
+ NULL::numeric as compound_peaks_id, NULL::numeric as mass, NULL::numeric as retention_time,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.compound_id = result.compound_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.compound_id, result.isotopomer, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t1
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, cpc.compound_id,
+ last_value(cmsr.value) over w1 as max_value,
+ first_value(cmsr.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp,
+ apidb.CompoundPeaksChebi cpc
+ WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id
+ AND cmsr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND cp.compound_peaks_id = cmsr.compound_peaks_id
+ AND cpc.compound_peaks_id = cp.compound_peaks_id
+ AND pan.name like '%mean%'
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time
+ ORDER BY cmsr.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, cpc.compound_id, cpc.isotopomer, 'values' as profile_type, s.node_type,
+ cpc.compound_peaks_id, cp.mass, cp.retention_time,
+ string_agg(coalesce(round(cmsr.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp,
+ apidb.CompoundPeaksChebi cpc, min_max
+ WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id
+ AND cmsr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND cp.compound_peaks_id = cmsr.compound_peaks_id
+ AND cpc.compound_peaks_id = cp.compound_peaks_id
+ AND pan.name like '%mean%'
+ AND min_max.compound_id = cpc.compound_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t2
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, cpc.compound_id,
+ last_value(cmsr.percentile) over w1 as max_value,
+ first_value(cmsr.percentile) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp,
+ apidb.CompoundPeaksChebi cpc
+ WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id
+ AND cmsr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND cp.compound_peaks_id = cmsr.compound_peaks_id
+ AND cpc.compound_peaks_id = cp.compound_peaks_id
+ AND pan.name like '%mean%'
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time
+ ORDER BY cmsr.percentile ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, cpc.compound_id, cpc.isotopomer, 'percentiles' as profile_type, s.node_type,
+ cpc.compound_peaks_id, cp.mass, cp.retention_time,
+ string_agg(coalesce(round(cmsr.percentile::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp,
+ apidb.CompoundPeaksChebi cpc, min_max
+ WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id
+ AND cmsr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND cp.compound_peaks_id = cmsr.compound_peaks_id
+ AND cpc.compound_peaks_id = cp.compound_peaks_id
+ AND pan.name like '%mean%'
+ AND min_max.compound_id = cpc.compound_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t3
+ ) compound_profile
+ WHERE ca.id = compound_profile.compound_id
+ UNION ALL
+ -- OntologyTermResult
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, ot.name,
+ last_value(otr.value) over w1 as max_value,
+ first_value(otr.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.OntologyTermResult otr, sres.OntologyTerm ot
+ WHERE ot.ontology_term_id = otr.ontology_term_id
+ AND otr.protocol_app_node_id = sl.protocol_app_node_id
+ AND otr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, ot.name
+ ORDER BY otr.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, ot.name as source_id, 'value' as profile_type, s.node_type,
+ string_agg(coalesce(round(otr.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.OntologyTermResult otr, sres.OntologyTerm ot, min_max
+ WHERE ot.ontology_term_id = otr.ontology_term_id
+ AND otr.protocol_app_node_id = sl.protocol_app_node_id
+ AND otr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.name = ot.name
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, ot.name, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t1
+ UNION ALL
+ -- SubjectResult
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.subject,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.SubjectResult result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.subject
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.subject as source_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, min_max, apidb.SubjectResult result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.subject = result.subject
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.subject, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t2
+ UNION ALL
+ -- wgcna eigengene results
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.module_name,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.EigenGeneWgcnaResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.module_name
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.module_name as source_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, min_max, apidb.EigenGeneWgcnaResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.module_name = result.module_name
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.module_name, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t3
+ ) profile
+ WHERE ds.name = d.name
+ AND ds.version = r.version
+ AND d.external_database_id = r.external_database_id
+ AND profile.node_set_id = ps.node_set_id
+ AND ps.external_database_release_id = r.external_database_release_id
+ )
+ LOOP
+ ctrows := ctrows + 1;
+ INSERT INTO Profile
+ (DATASET_NAME, DATASET_TYPE, DATASET_SUBTYPE, PROFILE_TYPE, NODE_TYPE, SOURCE_ID, PROFILE_STUDY_ID, PROFILE_SET_NAME,
+ PROFILE_SET_SUFFIX, PROFILE_AS_STRING, MAX_VALUE, MIN_VALUE, MAX_TIMEPOINT, MIN_TIMEPOINT)
+ VALUES
+ (pf_rows.DATASET_NAME, pf_rows.DATASET_TYPE, pf_rows.DATASET_SUBTYPE, pf_rows.PROFILE_TYPE, pf_rows.NODE_TYPE, pf_rows.SOURCE_ID, pf_rows.PROFILE_STUDY_ID, pf_rows.PROFILE_SET_NAME,
+ pf_rows.PROFILE_SET_SUFFIX, pf_rows.PROFILE_AS_STRING, pf_rows.MAX_VALUE, pf_rows.MIN_VALUE, pf_rows.MAX_TIMEPOINT, pf_rows.MIN_TIMEPOINT);
+ IF ctrows >= commit_after THEN
+ COMMIT;
+ ctrows := 0;
+ END IF;
+ END LOOP;
+ commit;
+ END;
+ $$ LANGUAGE PLPGSQL;
+
+ ;
+
+
+
+ UPDATE Profile
+ SET dataset_name = 'tbruTREU927_Rijo_Circadian_Regulation_rnaSeq_RSRC'
+ WHERE dataset_name= 'tbruTREU927_RNASeq_Rijo_Circadian_Regulation_RSRC'
+
+ ;
+
diff --git a/Model/lib/psql/webready/keep/ProfileSamples.psql b/Model/lib/psql/webready/keep/ProfileSamples.psql
new file mode 100644
index 000000000..e2ed3d0b2
--- /dev/null
+++ b/Model/lib/psql/webready/keep/ProfileSamples.psql
@@ -0,0 +1,167 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE ProfileSamples AS
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id as study_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, results.nafeatureexpression r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ AND pt.profile_type not in ('pvalue', 'period', 'amplitude', 'probability_mean','sd','lower_CI','upper_CI','correlation_coefficient')
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.NAFeatureMetacycle r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ AND pt.profile_type not in ('values', 'channel1_percentiles', 'channel2_percentiles', 'standard_error')
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.LopitResults r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ AND pt.profile_type not in ('values', 'channel1_percentiles', 'channel2_percentiles', 'standard_error')
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, results.compoundMassSpec r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, APIDB.compoundmassspecresult r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ and pan.name like '%mean%'
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.ontologytermresult r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, results.nafeaturehostresponse r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.crisprphenotype r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.phenotypescore r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.phenotypegrowthrate r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.subjectresult r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.EigenGeneWgcnaResults r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/keep/ProfileSamples_ix.psql b/Model/lib/psql/webready/keep/ProfileSamples_ix.psql
new file mode 100644
index 000000000..d59cf5e21
--- /dev/null
+++ b/Model/lib/psql/webready/keep/ProfileSamples_ix.psql
@@ -0,0 +1,14 @@
+ create index psamp_ix
+ on :SCHEMA.ProfileSamples
+ (dataset_name, profile_type, study_id, node_order_num,
+ protocol_app_node_id, profile_set_suffix, study_name,
+ node_type, protocol_app_node_name)
+ ;
+
+ create index psampstdy_ix
+ on :SCHEMA.ProfileSamples
+ (study_name, node_type, profile_type, node_order_num,
+ protocol_app_node_id, profile_set_suffix, study_id,
+ protocol_app_node_name, dataset_name)
+ ;
+
diff --git a/Model/lib/psql/webready/keep/ProfileType.psql b/Model/lib/psql/webready/keep/ProfileType.psql
new file mode 100644
index 000000000..0f0f83616
--- /dev/null
+++ b/Model/lib/psql/webready/keep/ProfileType.psql
@@ -0,0 +1,13 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE ProfileType AS
+ SELECT DISTINCT dataset_name, profile_study_id, profile_set_name, profile_set_suffix, node_type, profile_type,
+ dataset_type, dataset_subtype
+ FROM profile
+ WHERE profile_as_string IS NOT NULL
+ ORDER BY dataset_name, profile_set_name, profile_type
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/keep/ProfileType_ix.psql b/Model/lib/psql/webready/keep/ProfileType_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/keep/Profile_ix.psql b/Model/lib/psql/webready/keep/Profile_ix.psql
new file mode 100644
index 000000000..46b746ade
--- /dev/null
+++ b/Model/lib/psql/webready/keep/Profile_ix.psql
@@ -0,0 +1,12 @@
+ create exprof_idx
+ on :SCHEMA.Profile (source_id, profile_type, profile_set_name)
+ ;
+
+ create profset_idx
+ on :SCHEMA.Profile (profile_set_name, profile_type)
+ ;
+
+ create srcdset_idx
+ on :SCHEMA.Profile (source_id, dataset_subtype, dataset_type)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
new file mode 100644
index 000000000..a29541ffe
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
@@ -0,0 +1,41 @@
+ :CREATE_AND_POPULATE
+ SELECT DISTINCT ta.source_id, ta.gene_source_id, sr.protocol_app_node_id,
+ ta.project_id, ta.org_abbrev, current_timestamp as modification_date,
+ CASE
+ WHEN ta.is_reversed = 0
+ THEN round(abs(ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0)
+ ELSE round(abs(ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0)
+ END as distance,
+ CASE
+ WHEN /* distance > 0 */
+ CASE WHEN ta.is_reversed = 0
+ THEN ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)
+ ELSE ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)
+ END > 0
+ THEN
+ CASE
+ WHEN ta.is_reversed = 0
+ THEN '-'
+ ELSE '+'
+ END
+ ELSE
+ CASE
+ WHEN ta.is_reversed = 1
+ THEN '-'
+ ELSE '+'
+ END
+ END as direction,
+ sr.score1 as score
+ FROM :SCHEMA.TranscriptAttributes ta,
+ Results.segmentresult sr,
+ Study.nodenodeset sl,
+ Study.nodeset s
+ WHERE sr.na_sequence_id = ta.na_sequence_id
+ AND s.node_set_id = sl.node_set_id
+ AND sl.protocol_app_node_id = sr.protocol_app_node_id
+ AND lower(s.name) like '%chip%peaks'
+ AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000)
+ or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) )
+ AND ta.org_abbrev = ':ORG_ABBREV'
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript_ix.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript_ix.psql
new file mode 100644
index 000000000..11d4fa447
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript_ix.psql
@@ -0,0 +1,3 @@
+ create index chpgene_geneid_idx ON :SCHEMA.ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers.psql b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers.psql
new file mode 100644
index 000000000..eafe7652b
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers.psql
@@ -0,0 +1,22 @@
+:CREATE_AND_POPULATE
+ SELECT DISTINCT
+ ta.project_id
+ , ta.org_abbrev
+ , current_timestamp as modification_date
+ , ta.na_sequence_id
+ , ta.chromosome
+ , ccn.chr_copy_number AS ploidy
+ , io.input_pan_id
+ , io.output_pan_id
+ FROM apidb.ChrCopyNumber ccn
+ , :SCHEMA.TranscriptAttributes ta
+ , :SCHEMA.PANIo io
+ WHERE ta.na_sequence_id = ccn.na_sequence_id
+ AND ta.chromosome IS NOT NULL
+ AND ccn.protocol_app_node_id = io.output_pan_id
+ and ta.org_abbrev = ':ORG_ABBREV'
+ and io.org_abbrev = ':ORG_ABBREV'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql
new file mode 100644
index 000000000..c5a7eff54
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql
@@ -0,0 +1,9 @@
+ CREATE index ChrCN_ix
+ ON :SCHEMA.ChrCopyNumbers (input_pan_id, na_sequence_id)
+ ;
+
+
+ CREATE index ChrCN_output
+ ON :SCHEMA.ChrCopyNumbers (output_pan_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/CodingSequence.psql b/Model/lib/psql/webready/orgSpecific/CodingSequence.psql
new file mode 100644
index 000000000..a1682717e
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/CodingSequence.psql
@@ -0,0 +1,11 @@
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ ta.source_id,
+ SUBSTR(sns.sequence, tf.translation_start::INTEGER,
+ tf.translation_stop::INTEGER - tf.translation_start::INTEGER + 1) as sequence
+ FROM :SCHEMA.TranscriptAttributes ta, dots.SplicedNaSequence sns, dots.TranslatedAaFeature tf
+ WHERE ta.source_id = sns.source_id
+ AND ta.na_feature_id = tf.na_feature_id
+ AND ta.org_abbrev = ':ORG_ABBREV'
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/CodingSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/CodingSequence_ix.psql
new file mode 100644
index 000000000..e580f236f
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/CodingSequence_ix.psql
@@ -0,0 +1,3 @@
+ create index CodSeq_ix on :SCHEMA.CodingSequence (source_id, project_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/EqtlSpan.psql b/Model/lib/psql/webready/orgSpecific/EqtlSpan.psql
new file mode 100644
index 000000000..fdad100be
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/EqtlSpan.psql
@@ -0,0 +1,24 @@
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ gene_source_id, haplotype_block_name as hapblock_id, sequence_id,
+ start_min, end_max, start_max, end_min,
+ max(score) as lod_score, organism
+ FROM (SELECT ga.gene_source_id, ga.project_id, gls.haplotype_block_name,
+ ens.source_id as sequence_id, nl.start_min, nl.end_max, nl.start_max, nl.end_min,
+ gls.lod_score_mant * power(10::double precision, gls.lod_score_exp) as score,
+ replace (ga.organism, ' ', '+') as organism
+ FROM dots.ChromosomeElementFeature cef, apidb.NAFeatureHaploblock gls,
+ dots.ExternalNaSequence ens, dots.NaLocation nl, :SCHEMA.TranscriptAttributes ga
+ WHERE gls.na_feature_id = ga.gene_na_feature_id
+ AND cef.name = gls.haplotype_block_name
+ AND nl.na_feature_id = cef.na_feature_id
+ AND cef.na_sequence_id = ens.na_sequence_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+ AND (gls.lod_score_mant * power(10::double precision, gls.lod_score_exp)) >= 1.5
+ ) t
+ GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name,
+ start_min, end_max, start_max, end_min, organism
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/EqtlSpan_ix.psql b/Model/lib/psql/webready/orgSpecific/EqtlSpan_ix.psql
new file mode 100644
index 000000000..9238b045e
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/EqtlSpan_ix.psql
@@ -0,0 +1,4 @@
+ create index eqtlSpan_ix
+ on :SCHEMA.eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql
new file mode 100644
index 000000000..c4546c4fe
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql
@@ -0,0 +1,74 @@
+ drop table if exists :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp AS
+ SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession,
+ e.library_id, ba.query_taxon_id, ba.target_na_sequence_id,
+ ba.target_taxon_id, ba.percent_identity, ba.is_consistent,
+ ba.is_best_alignment, ba.is_reversed, ba.target_start, ba.target_end,
+ ga.sequence_id AS target_sequence_source_id,
+ least(ba.target_end, ga.gene_end_max)
+ - greatest(ba.target_start, ga.gene_start_min) + 1
+ AS est_gene_overlap_length,
+ ba.query_bases_aligned / (query_sequence.length)
+ * 100 AS percent_est_bases_aligned,
+ ga.gene_source_id AS gene,':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM dots.BlatAlignment ba, dots.Est e,
+ :SCHEMA.TranscriptAttributes ga,
+ dots.NaSequence query_sequence
+ WHERE e.na_sequence_id = ba.query_na_sequence_id
+ AND ga.na_sequence_id = ba.target_na_sequence_id
+ AND least(ba.target_end, ga.gene_end_max) - greatest(ba.target_start, ga.gene_start_min) >= 0
+ AND query_sequence.na_sequence_id = ba.query_na_sequence_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+
+ ;
+
+ drop table if exists :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp AS
+ SELECT * from :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp WHERE 1=0 UNION /* define datatype for null column */
+ SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession,
+ e.library_id, ba.query_taxon_id, ba.target_na_sequence_id,
+ ba.target_taxon_id, ba.percent_identity, ba.is_consistent,
+ ba.is_best_alignment, ba.is_reversed, ba.target_start, ba.target_end,
+ sequence.source_id AS target_sequence_source_id,
+ NULL AS est_gene_overlap_length,
+ ba.query_bases_aligned / (query_sequence.length)
+ * 100 AS percent_est_bases_aligned,
+ NULL AS gene,':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM dots.BlatAlignment ba, dots.Est e, dots.AssemblySequence aseq,
+ dots.NaSequence sequence, dots.NaSequence query_sequence
+ WHERE e.na_sequence_id = ba.query_na_sequence_id
+ AND e.na_sequence_id = query_sequence.na_sequence_id
+ AND aseq.na_sequence_id = ba.query_na_sequence_id
+ AND ba.target_na_sequence_id = sequence.na_sequence_id
+ AND sequence.taxon_id = :TAXON_ID
+ AND ba.blat_alignment_id IN
+ ( /* set of blat_alignment_ids not in in first leg of UNION */
+ /* (because they overlap no genes) */
+ SELECT ba.blat_alignment_id
+ FROM dots.BlatAlignment ba, dots.NaSequence query_sequence,
+ sres.OntologyTerm so, dots.NaSequence target_sequence
+ WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id
+ AND query_sequence.sequence_ontology_id = so.ontology_term_id
+ AND ba.target_na_sequence_id = target_sequence.na_sequence_id
+ AND target_sequence.taxon_id = :TAXON_ID
+ AND so.name = 'EST'
+ EXCEPT
+ SELECT blat_alignment_id FROM :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp)
+
+ ;
+
+:CREATE_AND_POPULATE
+
+ SELECT * FROM :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp
+ UNION
+ SELECT * FROM :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp
+
+
+:DECLARE_PARTITION;
+
+drop table :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp;
+drop table :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary_ix.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary_ix.psql
new file mode 100644
index 000000000..adac5eceb
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary_ix.psql
@@ -0,0 +1,20 @@
+
+
+ create index EstSumm_libOverlap_ix
+ ON :SCHEMA.EstAlignmentGeneSummary
+ (library_id, percent_identity, is_consistent,
+ est_gene_overlap_length, percent_est_bases_aligned)
+
+
+ ;
+
+
+
+ create index EstSumm_estSite_ix
+ ON :SCHEMA.EstAlignmentGeneSummary
+ (target_sequence_source_id, target_start, target_end,
+ library_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
new file mode 100644
index 000000000..4564bd263
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
@@ -0,0 +1,59 @@
+:CREATE_AND_POPULATE
+ SELECT
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ ens.source_id,
+ e.seq_primer AS primer,
+ ens.a_count,
+ ens.c_count,
+ ens.g_count,
+ ens.t_count,
+ (ens.length - (ens.a_count + ens.c_count + ens.g_count + ens.t_count))
+ AS other_count,
+ ens.length,
+ replace(l.dbest_name, '''', '-') as dbest_name,
+ coalesce(regexp_replace(l.vector, '^\s+$', null), 'unknown') AS vector,
+ coalesce(regexp_replace(l.stage, '^\s+$', null), 'unknown') AS stage,
+ SUBSTR(CASE
+ WHEN tn.name = 'Giardia lamblia' THEN 'Giardia Assemblage A isolate WB'
+ ELSE tn.name
+ END, 1, 100) AS organism,
+ taxon.ncbi_tax_id,
+ ed.name AS external_db_name,
+ coalesce(best.best_alignment_count, 0) AS best_alignment_count,
+ l.library_id, replace(l.dbest_name, '''', '-') as library_dbest_name
+ FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm,
+ sres.TaxonName tn, sres.ExternalDatabase ed,
+ apidb.datasource ds, apidb.organism o,
+ sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens
+ LEFT JOIN
+ (select query_na_sequence_id,max(ct) as best_alignment_count
+ from (
+ SELECT query_na_sequence_id, COUNT(*) AS ct
+ FROM dots.BlatAlignment ba, apidb.datasource ds, apidb.organism o,
+ sres.externaldatabase d, sres.externaldatabaserelease r
+ WHERE is_best_alignment = 1
+ AND ba.query_external_db_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND d.name = ds.name
+ AND ds.taxon_id = o.taxon_id
+ AND o.is_reference_strain = 1
+ AND o.taxon_id = :TAXON_ID
+ GROUP BY target_external_db_release_id,query_na_sequence_id) t
+ group by query_na_sequence_id
+ ) best ON ens.na_sequence_id = best.query_na_sequence_id
+ WHERE e.na_sequence_id = ens.na_sequence_id
+ AND e.library_id = l.library_id
+ AND ens.taxon_id = tn.taxon_id
+ AND ens.taxon_id = taxon.taxon_id
+ AND tn.name_class='scientific name'
+ AND ens.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ens.sequence_ontology_id = oterm.ontology_term_id
+ AND ed.name = ds.name
+ and ds.taxon_id = o.taxon_id
+ and o.is_reference_strain = 1
+ and o.taxon_id = :TAXON_ID
+ AND oterm.name = 'EST'
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql
new file mode 100644
index 000000000..3cad24bdc
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql
@@ -0,0 +1,7 @@
+
+
+ create unique index EstAttr_source_id ON :SCHEMA.EstAttributes (org_abbrev, source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/EstSequence.psql b/Model/lib/psql/webready/orgSpecific/EstSequence.psql
new file mode 100644
index 000000000..58bc1dfd9
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/EstSequence.psql
@@ -0,0 +1,21 @@
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ , ens.source_id
+ , ens.sequence
+ FROM dots.ExternalNaSequence ens
+ , sres.OntologyTerm oterm
+ , apidb.organism o
+ , sres.externaldatabase d
+ , sres.externaldatabaserelease r
+ , apidb.datasource ds
+ WHERE oterm.name = 'EST'
+ AND oterm.ontology_term_id = ens.sequence_ontology_id
+ AND ens.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND d.name = ds.name
+ AND ds.taxon_id = o.taxon_id
+ AND ds.taxon_id = :TAXON_ID
+ AND o.is_reference_strain = 1
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/EstSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/EstSequence_ix.psql
new file mode 100644
index 000000000..d124b417b
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/EstSequence_ix.psql
@@ -0,0 +1,3 @@
+ create index EstSeq_ix on :SCHEMA.EstSequence (source_id, project_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
new file mode 100644
index 000000000..7ee49f52c
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
@@ -0,0 +1,119 @@
+:CREATE_AND_POPULATE
+ SELECT DISTINCT ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ , ta.gene_source_id AS source_id
+ , gene_na_feature_id AS na_feature_id
+ , na_sequence_id
+ , is_reversed
+ , gene_start_min AS start_min
+ , gene_end_max AS end_max
+ , CASE strand WHEN 'forward' THEN '+' WHEN 'reverse' THEN '-' ELSE null END as strand_plus_minus
+ , sequence_id
+ , gene_name AS name
+ , COALESCE(aggregates.product, aggregates.transcript_product) as old_product
+ , COALESCE(gp.product, 'unspecified product') as product
+ , 'FIX ME' as product
+ , gene_type
+ , gene_ebi_biotype
+ , gene_id
+ , aggregates.is_pseudo
+ , organism
+ , species
+ , taxon_id
+ , species as genus_species
+ , strain
+ , ncbi_tax_id
+ , so_id
+ , so_term_name
+ , so_term_definition
+ , so_version
+ , anticodon
+ , external_db_name
+ , external_db_version
+ , external_db_rls_id
+ , chromosome
+ , chromosome_order_num
+ , sequence_type
+ , gene_transcript_count AS transcript_count
+ , gene_exon_count as exon_count
+ , gene_previous_ids as previous_ids
+ , is_deprecated
+ , gene_paralog_number as paralog_number
+ , gene_ortholog_number as ortholog_number
+ , gene_context_start as context_start
+ , gene_context_end as context_end
+ , orthomcl_name
+ -- , gene_total_hts_snps as total_hts_snps
+ -- , gene_hts_nonsynonymous_snps as hts_nonsynonymous_snps
+ -- , gene_hts_stop_codon_snps as hts_stop_codon_snps
+ -- , gene_hts_noncoding_snps as hts_noncoding_snps
+ -- , gene_hts_synonymous_snps as hts_synonymous_snps
+ -- , gene_hts_nonsyn_syn_ratio as hts_nonsyn_syn_ratio
+ , comment_string
+ , uniprot.uniprot_id
+ , uniprot.uniprot_id_internal
+ , gene_entrez_id as entrez_id
+ , representative_transcript
+ , gene_zoom_context_start as zoom_context_start
+ , gene_zoom_context_end as zoom_context_end
+ , cast (null as numeric) as strain_count
+ , ta.gene_locations as locations
+ FROM :SCHEMA.TranscriptAttributes ta
+ INNER JOIN (
+ SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo,
+ --MIN(gene_product) AS product,
+ 'FIX ME' AS product,
+ substr(STRING_AGG(transcript_product, ',' order by transcript_product), 1, 240) as transcript_product
+ FROM :SCHEMA.TranscriptAttributes
+ WHERE org_abbrev = ':ORG_ABBREV'
+ GROUP BY gene_source_id
+ ) aggregates ON ta.gene_source_id = aggregates.gene_source_id
+ LEFT JOIN (
+ SELECT na_feature_id,
+ substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id,
+ substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal
+ FROM (
+ SELECT distinct t.gene_na_feature_id as na_feature_id, dr.primary_identifier as uniprot_id
+ FROM sres.DbRef dr, dots.DbRefNaFeature x, :SCHEMA.Transcriptattributes t,
+ sres.ExternalDatabase d, sres.ExternalDatabaseRelease r
+ WHERE dr.db_ref_id = x.DB_REF_ID
+ AND t.org_abbrev = ':ORG_ABBREV'
+ AND (x.na_feature_id = t.na_feature_id OR x.na_feature_id = t.gene_na_feature_id)
+ AND dr.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND (d.name like '%uniprot_dbxref_RSRC'
+ OR d.name like '%dbxref_gene2Uniprot_RSRC'
+ OR d.name = 'Links to Uniprot Genes'
+ OR d.name like '%_dbxref_uniprot_from_annotation_RSRC')
+ ) t
+ GROUP BY na_feature_id
+ ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id and ta.org_abbrev = ':ORG_ABBREV'
+ LEFT JOIN :SCHEMA.GeneProduct gp ON ta.gene_source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV'
+ ORDER BY ta.gene_source_id
+:DECLARE_PARTITION;
+
+ drop table if exists :SCHEMA.:ORG_ABBREVSpeciesInfoTmp;
+
+ CREATE unlogged TABLE :SCHEMA.:ORG_ABBREVSpeciesInfoTmp as
+ SELECT genus_species, count(distinct organism) as strain_count
+ FROM :SCHEMA.GeneAttributes
+ WHERE org_abbrev = ':ORG_ABBREV'
+ GROUP BY genus_species
+
+ ;
+
+
+
+ UPDATE :SCHEMA.GeneAttributes ga
+ SET strain_count = (
+ SELECT strain_count
+ FROM :SCHEMA.:ORG_ABBREVSpeciesInfoTmp si
+ WHERE si.genus_species = ga.genus_species
+ )
+ WHERE org_abbrev = ':ORG_ABBREV'
+
+ ;
+
+drop table :SCHEMA.:ORG_ABBREVSpeciesInfoTmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql
new file mode 100644
index 000000000..2e66f521e
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql
@@ -0,0 +1,47 @@
+ CREATE UNIQUE INDEX GeneAttr_srcPrj
+ ON :SCHEMA.GeneAttributes (org_abbrev, source_id)
+ ;
+
+ CREATE INDEX GeneAttr_exon_ix
+ ON :SCHEMA.GeneAttributes (exon_count, source_id, project_id)
+ ;
+
+ CREATE INDEX GeneAttr_loc_ix
+ ON :SCHEMA.GeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated)
+ ;
+
+ CREATE INDEX GeneAttr_feat_ix
+ ON :SCHEMA.GeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed)
+ ;
+
+ CREATE INDEX GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes (
+ orthomcl_name, source_id, taxon_id, gene_type, na_feature_id,
+ na_sequence_id, start_min, end_max, organism, species,
+ product, project_id
+ )
+ ;
+
+ CREATE INDEX GeneAttr_ortholog_ix
+ ON :SCHEMA.GeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id)
+ ;
+
+ CREATE INDEX GeneAttr_orgsrc_ix
+ ON :SCHEMA.GeneAttributes (organism, source_id, na_sequence_id, start_min, end_max)
+ ;
+
+ CREATE INDEX GeneAttr_prjsrc_ix
+ ON :SCHEMA.GeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0))
+ ;
+
+ CREATE INDEX GeneAttr_txid_ix
+ ON :SCHEMA.GeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id)
+ ;
+
+ CREATE INDEX GeneAttr_ids_ix
+ ON :SCHEMA.GeneAttributes (na_feature_id, source_id, project_id)
+ ;
+
+ CREATE INDEX GeneAttr_loc_intjunc_ix
+ ON :SCHEMA.GeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers.psql b/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers.psql
new file mode 100644
index 000000000..fab9996a9
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers.psql
@@ -0,0 +1,30 @@
+:CREATE_AND_POPULATE
+ SELECT DISTINCT ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ , ta.source_id
+ , ta.gene_source_id
+ , REGEXP_REPLACE(pan.name, '_[A-Za-z0-9]+ (.+)$', '') AS strain
+ , gcn.haploid_number AS raw_estimate
+ , gcn.ref_copy_number AS ref_cn
+ , CASE WHEN (gcn.haploid_number < 0.01) THEN 0
+ WHEN (0.01 < gcn.haploid_number AND gcn.haploid_number < 1.85) THEN 1
+ ELSE round(gcn.haploid_number) END AS haploid_number
+ , ta.chromosome
+ , ta.na_sequence_id
+ , io.input_pan_id
+ , io.output_pan_id
+ FROM apidb.genecopynumber gcn
+ , study.protocolappnode pan
+ , :SCHEMA.TranscriptAttributes ta
+ , :SCHEMA.PANIo io
+ WHERE gcn.protocol_app_node_id = pan.protocol_app_node_id
+ AND gcn.na_feature_id = ta.gene_na_feature_id
+ AND gcn.protocol_app_node_id = io.output_pan_id
+ AND (ta.gene_type = 'protein coding' or ta.gene_type = 'protein coding gene')
+ AND ta.org_abbrev = ':ORG_ABBREV'
+ AND io.org_abbrev = ':ORG_ABBREV'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers_ix.psql
new file mode 100644
index 000000000..084742ec0
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers_ix.psql
@@ -0,0 +1,4 @@
+ CREATE INDEX GeneCN_ix
+ ON :SCHEMA.GeneCopyNumbers (input_pan_id, na_sequence_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneGoTable.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTable.psql
new file mode 100644
index 000000000..25fa84491
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneGoTable.psql
@@ -0,0 +1,29 @@
+:CREATE_AND_POPULATE
+ SELECT source_id,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ go_id,
+ string_agg(transcript_source_id, ', ' order by transcript_source_id) as transcript_ids,
+ is_not,
+ max(go_term_name) as go_term_name, ontology, source, evidence_code,
+ reference, evidence_code_parameter, sort_key
+ FROM (SELECT DISTINCT ggt.gene_source_id as source_id, ga.project_id,
+ replace(ggt.go_id, 'GO_', 'GO:') as go_id,
+ ggt.transcript_source_id,
+ case when ggt.is_not = 'not' then 'Is not' else '' end AS is_not,
+ ggt.go_term_name, ggt.ontology, ggt.source, ggt.evidence_code,
+ ggt.reference, ggt.evidence_code_parameter,
+ substr(ggt.ontology, 1, 1) || replace(ggt.go_id, 'GO_', 'GO:') as sort_key
+ FROM :SCHEMA.GeneGoTerms ggt, :SCHEMA.GeneAttributes ga
+ WHERE ggt.gene_source_id = ga.source_id
+ and ggt.org_abbrev = ':ORG_ABBREV'
+ and ga.org_abbrev = ':ORG_ABBREV'
+ ) t
+ GROUP BY source_id, project_id, go_id, is_not, ontology,
+ source, evidence_code, reference, evidence_code_parameter, sort_key
+ ORDER BY source_id, ontology, go_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql
new file mode 100644
index 000000000..5e34d5288
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql
@@ -0,0 +1,5 @@
+ create index ggtab_ix ON :SCHEMA.GeneGoTable
+ (source_id, project_id, go_id, transcript_ids, is_not, go_term_name,
+ ontology, source, evidence_code, reference, evidence_code_parameter, sort_key)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneGoTerms.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTerms.psql
new file mode 100644
index 000000000..87fec450b
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneGoTerms.psql
@@ -0,0 +1,37 @@
+:CREATE_AND_POPULATE
+ with root_term
+ as (select ontology_term_id,
+ cast(initcap(replace(name, '_', ' ')) as varchar(20)) as ontology
+ from sres.OntologyTerm
+ where source_id in ('GO_0008150','GO_0003674','GO_0005575'))
+ select ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ gf.source_id as gene_source_id, t.source_id as transcript_source_id, taf.aa_sequence_id,
+ cast (CASE ga.is_not WHEN 0 THEN '' WHEN 1 THEN 'not' ELSE ga.is_not::varchar END as varchar(3)) as is_not, ns.taxon_id,
+ cast (gt.source_id as varchar(20)) as go_id,
+ gt.ontology_term_id as go_term_id, rt.ontology,
+ cast(gt.name as varchar(250)) as go_term_name,
+ cast(gail.name as varchar(24)) as source,
+ cast(gec.name as varchar(12)) as evidence_code,
+ cast(gaiec.reference as varchar(250)) as reference,
+ cast(gaiec.evidence_code_parameter as varchar(80))as evidence_code_parameter
+ from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf, dots.GoAssociation ga,
+ dots.GoAssociationInstance gai, dots.GoAssociationInstanceLoe gail,
+ dots.GoAssocInstEvidCode gaiec, sres.OntologyTerm gec, dots.NaSequence ns,
+ sres.OntologyTerm gt LEFT JOIN root_term rt ON gt.ancestor_term_id = rt.ontology_term_id
+ where t.parent_id = gf.na_feature_id
+ and gf.na_sequence_id = ns.na_sequence_id
+ and ns.taxon_id = :TAXON_ID
+ and t.na_feature_id = taf.na_feature_id
+ and taf.aa_sequence_id = ga.row_id
+ and ga.table_id = (select table_id
+ from core.TableInfo
+ where name = 'TranslatedAASequence')
+ and ga.go_term_id = gt.ontology_term_id
+ and ga.go_association_id = gai.go_association_id
+ and gai.go_assoc_inst_loe_id = gail.go_assoc_inst_loe_id
+ and gai.go_association_instance_id = gaiec.go_association_instance_id
+ and gaiec.go_evidence_code_id = gec.ontology_term_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneGoTerms_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTerms_ix.psql
new file mode 100644
index 000000000..d8456c06e
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneGoTerms_ix.psql
@@ -0,0 +1,6 @@
+ create index ggt_ix ON :SCHEMA.GeneGoTerms
+ (gene_source_id, transcript_source_id, ontology, go_id, go_term_id,
+ go_term_name, source, evidence_code, reference,
+ evidence_code_parameter, aa_sequence_id, is_not)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneId.psql b/Model/lib/psql/webready/orgSpecific/GeneId.psql
new file mode 100644
index 000000000..ec39a78b7
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneId.psql
@@ -0,0 +1,288 @@
+drop table if exists :SCHEMA.:ORG_ABBREVGeneFeatureTmp;
+
+create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as
+(select gf.na_feature_id
+ , gf.na_sequence_id
+ , gf.external_database_release_id
+ , gf.is_predicted
+ , gf.source_id
+ from dots.genefeature gf
+ , dots.nasequence nas
+ where gf.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
+)
+;
+
+create index :ORG_ABBREV_GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id)
+;
+
+
+:CREATE_AND_POPULATE
+ SELECT substr(mapping.id, 1, 100) as id, mapping.gene, cast (0 as NUMERIC(1)) as unique_mapping,
+ SUBSTR(string_agg(distinct union_member,'; ' order by union_member), 1, 100) as union_member,
+ SUBSTR(string_agg(distinct database_name,'; ' order by database_name), 1, 200) as database_name,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM (SELECT substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) AS id,
+ gf.source_id AS gene,
+ 'Transcript.protein_id before dot' as union_member, ed.name as database_name /* dots.Transcript.protein_id, trimmed at period */
+ FROM dots.Transcript t, :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE t.parent_id = gf.na_feature_id
+ AND substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) IS NOT NULL
+ AND gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT t.protein_id AS id,
+ gf.source_id AS gene,
+ 'Transcript.protein_id' as union_member, ed.name as database_name /* dots.Transcript.protein_id */
+ FROM dots.Transcript t, :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE t.parent_id = gf.na_feature_id
+ AND t.protein_id IS NOT NULL
+ AND gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ -- CHECK AND FIX
+ --AND NOT (ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity')
+ -- AND NOT REGEXP_LIKE (dr.primary_identifier, '\D') )
+ AND NOT ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity')
+ AND NOT edr.id_type = 'synonym'
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'DbRef.primary_identifier on Transcript' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = t.parent_id
+ AND t.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ -- CHECK AND FIX
+ --AND NOT (ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity')
+ -- AND NOT REGEXP_LIKE (dr.primary_identifier, '\D') )
+ AND NOT ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity')
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'DbRef.primary_identifier on Gene' as union_member, ed.name as database_name
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name ='RefSeq_gene_name'
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'VectorBase alternate names' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ and ed.name in ('VB_Community_Annotation', 'VB_Community_Symbol') /* vectorbase alt names */
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'synonym' as union_member, ed.name as database_name
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND edr.id_type = 'synonym'
+ UNION
+ SELECT dr.secondary_identifier AS id,
+ gf.source_id AS gene,
+ 'DbRef.secondary_identifier' as union_member, ed.name as database_name /* sres.DbRef.secondary_identifier */
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.secondary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name IN ('NRDB_gb_dbXRefBySeqIdentity',
+ 'NRDB_pdb_dbXRefBySeqIdentity',
+ 'NRDB_ref_dbXRefBySeqIdentity',
+ 'NRDB_sp_dbXRefBySeqIdentity',
+ 'Predicted protein structures','Pf_predictedProteinStructures_RSRC',
+ 'GenBank')
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'genbank DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier for Genbank records */
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.DbRefNaSequence drns,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE gf.na_feature_id = t.parent_id
+ AND t.na_sequence_id = drns.na_sequence_id
+ AND drns.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name = 'GenBank'
+ UNION
+ SELECT pred_loc.feature_source_id AS id,
+ gene_loc.feature_source_id AS gene,
+ 'overlapping predicted gene source_id' as union_member, ed.name as database_name /* dots.genefeature.source_id for predicted genes that overlap */
+ FROM apidb.FeatureLocation gene_loc, apidb.FeatureLocation pred_loc,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed,
+ dots.nasequence nas
+ WHERE pred_loc.feature_type = 'GenePrediction'
+ AND gene_loc.feature_type = 'GeneFeature'
+ AND pred_loc.na_sequence_id = gene_loc.na_sequence_id
+ AND gene_loc.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
+ AND gene_loc.start_min <= pred_loc.end_max
+ AND gene_loc.end_max >= pred_loc.start_min
+ AND pred_loc.is_reversed = gene_loc.is_reversed
+ AND pred_loc.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT ng.name AS id, gf.source_id AS gene,
+ 'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.NaFeatureNaGene nfng, dots.NaGene ng,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE gf.na_feature_id = nfng.na_feature_id
+ AND ng.na_gene_id = nfng.na_gene_id
+ AND gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT source_id AS id, source_id AS gene,
+ 'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT n.name AS id, gf.source_id AS gene,
+ 'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name
+ from :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
+ ( select na_feature_id, name
+ from apidb.GeneFeatureName
+ where is_preferred = 1
+ EXCEPT
+ -- suppress gene/name associations from the *DELETED_RSRC databases
+ select gfn.na_feature_id, gfn.name
+ from apidb.GeneFeatureName gfn,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ where gfn.external_database_release_id = edr.external_database_release_id
+ and ed.external_database_id = edr.external_database_id
+ and ed.name like '%DELETED_RSRC'
+ ) n
+ where n.na_feature_id = gf.na_feature_id
+ and gf.external_database_release_id = r.external_database_release_id
+ and r.external_database_id = d.external_database_id
+ UNION
+ select dr.primary_identifier as id,
+ gf.source_id as gene,
+ 'AA feature DbRef primary ID' as union_member,
+ ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */
+ from :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.TranslatedAaFeature taf,
+ dots.DbRefAaFeature draf, sres.DbRef dr,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ where gf.na_feature_id = t.parent_id
+ and t.na_feature_id = taf.na_feature_id
+ and taf.aa_feature_id = draf.aa_feature_id
+ and draf.db_ref_id = dr.db_ref_id
+ and dr.external_database_release_id = edr.external_database_release_id
+ and edr.external_database_id = ed.external_database_id
+ and ed.name
+ not in ('INTERPRO', 'PFAM', 'PIRSF', 'PRODOM', 'PROSITEPROFILES',
+ 'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER',
+ 'PRINTS','SCANPROSITE','SFLD')
+ ) mapping,
+ :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.NaSequence ns
+ WHERE mapping.gene = gf.source_id
+ AND gf.na_sequence_id = ns.na_sequence_id
+ AND (gf.is_predicted != 1 OR gf.is_predicted is null)
+ GROUP BY mapping.id, mapping.gene
+:DECLARE_PARTITION;
+
+
+
+ INSERT INTO :SCHEMA.GeneId
+ (id, gene, unique_mapping, union_member, database_name, project_id, org_abbrev, modification_date)
+ WITH munge
+ AS (SELECT DISTINCT
+ regexp_replace(id, '\.\d\d?$', '') as id,
+ gene, unique_mapping, union_member, database_name
+ FROM :SCHEMA.GeneId
+ WHERE org_abbrev = ':ORG_ABBREV'
+ -- CHECK AND FIX
+ --WHERE regexp_like(id, '(.*)\.\d\d?$')
+ )
+ SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM munge
+ WHERE id NOT IN (SELECT id FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV')
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id) AS
+ SELECT lower_id
+ FROM (SELECT DISTINCT lower(id) as lower_id, gene
+ FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV'
+ ) t
+ GROUP BY lower_id
+ HAVING count(*) = 1
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX :ORG_ABBREV_gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id)
+
+ ;
+
+
+
+ UPDATE :SCHEMA.GeneId
+ SET unique_mapping = 1
+ WHERE id = gene
+ and org_abbrev = ':ORG_ABBREV'
+ ;
+
+
+
+ UPDATE :SCHEMA.GeneId
+ SET unique_mapping = 1
+ WHERE lower(id) IN (select lower_id from :SCHEMA.:ORG_ABBREVOneGeneIdsTmp)
+ and org_abbrev = ':ORG_ABBREV'
+ ;
+
+
+drop table :SCHEMA.:ORG_ABBREVOneGeneIdsTmp
+;
+drop table :SCHEMA.:ORG_ABBREVGeneFeatureTmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql
new file mode 100644
index 000000000..5e3f12dce
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql
@@ -0,0 +1,15 @@
+ CREATE INDEX GeneId_gene_idx ON :SCHEMA.GeneId (gene, id)
+ ;
+
+ CREATE INDEX GeneId_id_idx ON :SCHEMA.GeneId (id, gene)
+ ;
+
+ CREATE INDEX GeneId_uniqid_idx ON :SCHEMA.GeneId (unique_mapping, id, gene)
+ ;
+
+ CREATE INDEX GeneId_lowid_idx ON :SCHEMA.GeneId (lower(id), gene)
+ ;
+
+ CREATE INDEX GeneId_uniqlowid_idx ON :SCHEMA.GeneId (unique_mapping, lower(id), gene)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
new file mode 100644
index 000000000..39837511c
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
@@ -0,0 +1,192 @@
+drop table if exists :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp;
+
+drop table if exists :SCHEMA.:ORG_ABBREVGIJtmp;
+
+/*
+ Distinct Annotated Intron Locations
+*/
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp (
+ na_sequence_id,
+ start_min,
+ end_max,
+ is_reversed,
+ feature_type
+) AS
+ SELECT il.na_sequence_id
+ , il.start_min
+ , il.end_max
+ , il.is_reversed
+ , 'Intron' as feature_type
+ FROM apidb.IntronLocation il
+ , dots.nasequence s
+ WHERE il.na_sequence_id = s.na_sequence_id
+ AND s.taxon_id = :TAXON_ID
+ GROUP by il.na_sequence_id
+ , il.start_min
+ , il.end_max
+ , il.is_reversed
+
+ ;
+
+CREATE UNIQUE INDEX :ORG_ABBREV_annottmpnew_pk_ix ON :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp (na_sequence_id,start_min,end_max,is_reversed,feature_type)
+
+ ;
+
+/*
+ this table does the heavy lifting for gene intron junction calculations
+*/
+create UNLOGGED table :SCHEMA.:ORG_ABBREVGIJtmp (
+ NA_SEQUENCE_ID NUMERIC(10),
+ SEQUENCE_SOURCE_ID VARCHAR(100),
+ SEGMENT_START NUMERIC,
+ SEGMENT_END NUMERIC,
+ TOTAL_UNIQUE NUMERIC,
+ TOTAL_ISRPM NUMERIC,
+ IS_REVERSED NUMERIC(1),
+ INTRON_FEATURE_ID VARCHAR(200),
+ MATCHES_GENE_STRAND NUMERIC,
+ GENE_SOURCE_ID VARCHAR(100),
+ GENE_NA_FEATURE_ID NUMERIC,
+ ANNOTATED_INTRON VARCHAR(10)
+)
+ ;
+
+
+DO $$
+ DECLARE
+ iter_length numeric := 4999 :PLPGSQL_DELIM
+ i_first_pos numeric := 1 :PLPGSQL_DELIM
+ i_last_pos numeric := i_first_pos + iter_length :PLPGSQL_DELIM
+ idlist RECORD :PLPGSQL_DELIM
+ BEGIN
+ FOR idlist IN (
+ SELECT na_sequence_id
+ , source_id as sequence_source_id
+ , length
+ , taxon_id
+ , CASE WHEN step_mult > 500000 THEN 500000 ELSE step_mult END as seq_step_mult
+ FROM (
+ SELECT gs.na_sequence_id
+ , gs.source_id
+ , gs.length
+ , gs.taxon_id
+ , 25000 * (1 + floor(gs.length/count(*))) as step_mult
+ FROM apidb.intronjunction ij
+ , dots.nasequence gs
+ WHERE gs.na_sequence_id = ij.na_sequence_id
+ AND gs.taxon_id = :TAXON_ID
+ GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id
+ )
+ )
+ LOOP
+ iter_length := idlist.seq_step_mult :PLPGSQL_DELIM
+ i_first_pos := 1 :PLPGSQL_DELIM
+ i_last_pos := i_first_pos + iter_length :PLPGSQL_DELIM
+ WHILE i_first_pos < idlist.length
+ LOOP
+ INSERT INTO :SCHEMA.:ORG_ABBREVGIJtmp
+ SELECT DISTINCT
+ junc.*,
+ CASE
+ WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed
+ THEN 1
+ ELSE 0
+ END as matches_gene_strand,
+ last_value(ga.source_id) over w1 as gene_source_id,
+ last_value(ga.na_feature_id) over w1 as gene_na_feature_id,
+ CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron
+ FROM (
+ SELECT ij.na_sequence_id
+ , idlist.sequence_source_id
+ , ij.segment_start
+ , ij.segment_end
+ , sum(ij.unique_reads) as total_unique
+ , round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm
+ , ij.is_reversed
+ , idlist.sequence_source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id
+ FROM apidb.intronjunction ij
+ , :SCHEMA.JunctionToCoverageProfileMapping je
+ WHERE ij.na_sequence_id = idlist.na_sequence_id
+ AND ij.segment_start between i_first_pos and i_last_pos
+ AND ij.unique_reads >= 1
+ AND je.junctions_pan_id = ij.protocol_app_node_id
+ AND je.multiplier < 20
+ AND je.org_abbrev = ':ORG_ABBREV'
+ GROUP BY ij.na_sequence_id
+ , ij.segment_start
+ , ij.segment_end
+ , ij.is_reversed
+ , idlist.sequence_source_id
+ ) junc
+ LEFT JOIN :SCHEMA.JunctionGeneLocation ga
+ ON junc.na_sequence_id = ga.na_sequence_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+ AND junc.segment_start >= ga.start_min
+ AND junc.segment_end <= ga.end_max
+ AND junc.is_reversed = ga.is_reversed
+ LEFT JOIN :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp ag
+ ON junc.na_sequence_id = ag.na_sequence_id
+ AND junc.segment_start = ag.start_min
+ AND junc.segment_end = ag.end_max
+ AND junc.is_reversed = ag.is_reversed
+ WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron')
+ WINDOW w1 AS (
+ PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type
+ ORDER BY ga.total_expression ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ :PLPGSQL_DELIM
+ commit :PLPGSQL_DELIM
+ i_first_pos := i_last_pos + 1 :PLPGSQL_DELIM
+ i_last_pos := i_first_pos + iter_length :PLPGSQL_DELIM
+ END LOOP :PLPGSQL_DELIM
+ END LOOP :PLPGSQL_DELIM
+ END :PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL :PLPGSQL_DELIM
+
+;
+
+
+create index :ORG_ABBREV_gijtmp_gnscid_ix on :SCHEMA.:ORG_ABBREVGIJtmp (gene_source_id)
+
+ ;
+
+:CREATE_AND_POPULATE
+ SELECT
+ junc.*
+ , CASE
+ WHEN maxv.gene_source_id is not null and maxv.max_isrpm > 0
+ THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2)
+ ELSE null
+ END as percent_max
+ , CASE
+ WHEN maxv.gene_source_id is not null
+ THEN 1
+ ELSE 0
+ END as contained
+ , :TAXON_ID as taxon_id
+-- , CAST (null as numeric(10)) as upstream_gene_id
+-- , CAST (null as numeric) as upstream_distance
+-- , CAST (null as numeric(10)) as downstream_gene_id
+-- , CAST (null as numeric) as downstream_distance
+ , ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ FROM
+ :SCHEMA.:ORG_ABBREVGIJtmp junc LEFT JOIN
+ (
+ SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm
+ FROM :SCHEMA.:ORG_ABBREVGIJtmp
+ WHERE gene_source_id is not null
+ GROUP BY gene_source_id
+ ) maxv ON junc.gene_source_id = maxv.gene_source_id
+:DECLARE_PARTITION
+ ;
+
+
+
+drop table :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp
+ ;
+
+drop table :SCHEMA.:ORG_ABBREVGIJtmp
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
new file mode 100644
index 000000000..22bd60369
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
@@ -0,0 +1,12 @@
+create index gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
+ ;
+
+create index gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id)
+ ;
+
+create index gijnew_txnloc_ix
+ on :SCHEMA.GeneIntronJunction
+ (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed,
+ total_unique, total_isrpm, annotated_intron)
+
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneLocations.psql b/Model/lib/psql/webready/orgSpecific/GeneLocations.psql
new file mode 100644
index 000000000..ad129a3f1
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneLocations.psql
@@ -0,0 +1,20 @@
+:CREATE_AND_POPULATE
+ SELECT source_id,
+ string_agg(location, '; ' order by is_top_level desc) as locations,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM (SELECT fl.feature_source_id as source_id, fl.is_top_level,
+ fl.sequence_source_id || ':'
+ || trim(to_char(fl.start_min,'999,999,999')) || '..'
+ || trim(to_char(fl.end_max,'999,999,999')) || '('
+ || CASE coalesce(fl.is_reversed, 0) WHEN 0 THEN '+' WHEN 1 THEN '-' ELSE fl.is_reversed::varchar END
+ || ')' as location
+ FROM apidb.FeatureLocation fl, dots.NaSequence ns
+ WHERE fl.feature_type = 'GeneFeature'
+ AND fl.na_sequence_id = ns.na_sequence_id
+ AND ns.taxon_id = :TAXON_ID
+ ) t
+ GROUP BY source_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneLocations_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneLocations_ix.psql
new file mode 100644
index 000000000..006a1a1df
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneLocations_ix.psql
@@ -0,0 +1,4 @@
+ create index gloc_ix
+ on :SCHEMA.GeneLocations (source_id, locations)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql
new file mode 100644
index 000000000..593fbbaee
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql
@@ -0,0 +1,69 @@
+drop table if exists :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp
+ ;
+
+create unlogged table :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp (
+ protocol_app_node_id NUMERIC(10),
+ gene_source_id VARCHAR(200),
+ max_unique NUMERIC,
+ max_isrpm NUMERIC,
+ sum_unique NUMERIC,
+ sum_isrpm NUMERIC,
+ avg_unique NUMERIC,
+ avg_isrpm NUMERIC,
+ project_id VARCHAR(20),
+ org_abbrev VARCHAR(20),
+ modification_date TIMESTAMP
+ )
+;
+
+
+DO $$
+ DECLARE
+ idlist RECORD :PLPGSQL_DELIM
+ BEGIN
+ FOR idlist IN (
+ SELECT DISTINCT na_sequence_id
+ FROM :SCHEMA.JunctionGeneLocation
+ WHERE org_abbrev = ':ORG_ABBREV'
+ )
+ LOOP
+ INSERT INTO :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp (
+ SELECT j.protocol_app_node_id
+ , ga.source_id
+ , max(unique_reads) as max_unique
+ , max(round(j.unique_reads * mult.multiplier,2)) as max_isrpm
+ , sum(unique_reads) as sum_unique
+ , sum(round(j.unique_reads * mult.multiplier,2)) as sum_isrpm
+ , avg(unique_reads) as avg_unique
+ , avg(round(j.unique_reads * mult.multiplier,2)) as avg_isrpm
+ , ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ FROM apidb.intronjunction j
+ , :SCHEMA.JunctionGeneLocation ga
+ , :SCHEMA.JunctionToCoverageProfileMapping mult
+ WHERE ga.na_sequence_id = idlist.na_sequence_id
+ AND ga.na_sequence_id = j.na_sequence_id
+ AND ga.start_min <= j.segment_start
+ AND ga.end_max >= j.segment_end
+ AND ga.is_reversed = j.is_reversed
+ AND j.protocol_app_node_id = mult.junctions_pan_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+ AND mult.org_abbrev = ':ORG_ABBREV'
+ GROUP BY j.protocol_app_node_id, ga.source_id
+ ) :PLPGSQL_DELIM
+ commit :PLPGSQL_DELIM
+ END LOOP :PLPGSQL_DELIM
+ END :PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL :PLPGSQL_DELIM
+ ;
+
+
+
+:CREATE_AND_POPULATE
+select * from :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp
+:DECLARE_PARTITION;
+
+
+drop table :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql
new file mode 100644
index 000000000..8915a4a6e
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql
@@ -0,0 +1,3 @@
+CREATE INDEX GnMxIntGIJ_ix on :SCHEMA.GeneMaxJunction (gene_source_id,protocol_app_node_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql
new file mode 100644
index 000000000..79eab5a4d
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql
@@ -0,0 +1,33 @@
+:CREATE_AND_POPULATE
+ SELECT source_id, project_id, sequence_id, gm_start,gm_end, type, is_reversed,
+ string_agg(transcript_id, ',' ORDER BY transcript_id) AS transcript_ids,
+ ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM (
+ SELECT distinct ta.source_id as transcript_id, ta.gene_source_id as source_id, ta.project_id,
+ ta.sequence_id,gm.start_min as gm_start, gm.end_max as gm_end,
+ gm.type, gl.is_reversed
+ FROM
+ apidb.FeatureLocation gl, dots.NaSequence s,
+ :SCHEMA.TranscriptAttributes ta,
+ (
+ SELECT CASE el.feature_type WHEN 'ExonFeature' THEN 'Exon' ELSE el.feature_type END as type,
+ el.parent_id as na_feature_id, el.start_min as start_min, el.end_max as end_max
+ FROM apidb.FeatureLocation el, dots.nasequence nas
+ WHERE el.feature_type in ('ExonFeature','five_prime_UTR', 'three_prime_UTR','CDS','Intron')
+ AND el.is_top_level = 1
+ AND nas.na_sequence_id = el.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
+ ) gm
+ WHERE gm.na_feature_id = ta.na_feature_id
+ AND s.na_sequence_id = gl.na_sequence_id
+ AND ta.na_feature_id = gl.na_feature_id
+ AND gl.is_top_level = 1
+ AND ta.org_abbrev = ':ORG_ABBREV'
+ ) t
+ GROUP BY source_id, project_id, sequence_id,
+ gm_start, gm_end, type, is_reversed
+ ORDER BY CASE WHEN is_reversed = 1 THEN -1 * gm_start ELSE gm_start END
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql
new file mode 100644
index 000000000..2a73000f9
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql
@@ -0,0 +1,5 @@
+ create index gmd_ix
+ on :SCHEMA.GeneModelDump
+ (source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql
new file mode 100644
index 000000000..0876f0b3a
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql
@@ -0,0 +1,21 @@
+DROP TABLE if exists :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp;
+/*
+
+ATTENTION: This table is empty. We will populate it in the comparative graph
+We are creating them it so that they it is partitioned.
+*/
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp (
+ gene_id VARCHAR(80),
+ group_id VARCHAR(16),
+ project_id varchar(20),
+ org_abbrev varchar(20),
+ modification_date timestamp
+);
+
+:CREATE_AND_POPULATE
+SELECT ogt.* from :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp ogt
+:DECLARE_PARTITION;
+
+
+DROP TABLE :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql
new file mode 100644
index 000000000..c7ce2fd90
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql
@@ -0,0 +1,2 @@
+alter table :SCHEMA.GeneOrthologGroup
+ ADD CONSTRAINT GeneOrthologGroup_pk PRIMARY KEY (gene_id, group_id);
diff --git a/Model/lib/psql/webready/orgSpecific/GeneProduct.psql b/Model/lib/psql/webready/orgSpecific/GeneProduct.psql
new file mode 100644
index 000000000..ed8670eda
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneProduct.psql
@@ -0,0 +1,130 @@
+
+create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp as
+(select gf.na_feature_id
+ , gf.source_id
+ , gf.product
+ from dots.genefeature gf
+ , dots.nasequence nas
+ where gf.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
+)
+;
+
+
+:CREATE_AND_POPULATE
+ with gfp_preferred
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select distinct gf.source_id, gfp.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, apidb.GeneFeatureProduct gfp
+ where gfp.na_feature_id = gf.na_feature_id
+ and gfp.is_preferred = 1) tmp
+ group by source_id),
+ gfp_any
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select distinct gf.source_id, gfp.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, apidb.GeneFeatureProduct gfp
+ where gfp.na_feature_id = gf.na_feature_id) tmp
+ group by source_id),
+ tp_preferred
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select distinct gf.source_id, tp.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t, apidb.TranscriptProduct tp
+ where t.parent_id = gf.na_feature_id
+ and tp.na_feature_id = t.na_feature_id
+ and tp.is_preferred = 1) tmp
+ group by source_id),
+ gf_product
+ as (select source_id, product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf
+ where product is not null),
+ tp_any
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select distinct gf.source_id, tp.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t, apidb.TranscriptProduct tp
+ where t.parent_id = gf.na_feature_id
+ and tp.na_feature_id = t.na_feature_id) tmp
+ group by source_id),
+ t_product
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select gf.source_id, t.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t
+ where t.parent_id = gf.na_feature_id
+ and t.product is not null) tmp
+ group by source_id)
+ select gf.source_id,
+ coalesce(gfp_preferred.product, gfp_any.product, tp_preferred.product,
+ gf.product, tp_any.product, t_product.product)
+ as product,
+ case
+ when gfp_preferred.product is not null
+ then gfp_preferred.value_count
+ when gfp_any.product is not null
+ then gfp_any.value_count
+ when tp_preferred.product is not null
+ then tp_preferred.value_count
+ when gf.product is not null
+ then 1
+ when tp_any.product is not null
+ then tp_any.value_count
+ when t_product.product is not null
+ then t_product.value_count
+ else 0 -- 'unspecified product'
+ end
+ as value_count,
+ case
+ when gfp_preferred.product is not null
+ then 1
+ when gfp_any.product is not null
+ then 2
+ when tp_preferred.product is not null
+ then 3
+ when gf.product is not null
+ then 4
+ when tp_any.product is not null
+ then 5
+ when t_product.product is not null
+ then 6
+ else 7
+ end
+ as source_rule,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf
+ LEFT JOIN gfp_preferred ON gf.source_id = gfp_preferred.source_id
+ LEFT JOIN gfp_any ON gf.source_id = gfp_any.source_id
+ LEFT JOIN tp_preferred ON gf.source_id = tp_preferred.source_id
+ LEFT JOIN gf_product ON gf.source_id = gf_product.source_id
+ LEFT JOIN tp_any ON gf.source_id = tp_any.source_id
+ LEFT JOIN t_product ON gf.source_id = t_product.source_id
+:DECLARE_PARTITION;
+
+
+/*
+
+
+Dependent Step
+ gene/protein_id -> uniprot accession and product
+
+This step will assume it has all uniprot accessions assined to the protein and uniprot product name and have pfams in our hand
+
+
+
+
+
+*/
+
+delete from :SCHEMA.GeneProduct where product is null and org_abbrev = ':ORG_ABBREV';
+
+drop table :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql
new file mode 100644
index 000000000..910d96552
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql
@@ -0,0 +1,2 @@
+ CREATE INDEX GeneProduct_gene_idx ON :SCHEMA.GeneProduct (source_id, product)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter.psql b/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter.psql
new file mode 100644
index 000000000..02e0664c2
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter.psql
@@ -0,0 +1,13 @@
+:CREATE_AND_POPULATE
+ SELECT CAST(filter_name AS VARCHAR(80)) AS filter_name,':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM (SELECT species as filter_name
+ FROM :SCHEMA.GeneAttributes
+ WHERE org_abbrev = ':ORG_ABBREV'
+ UNION
+ SELECT organism as filter_name
+ FROM :SCHEMA.GeneAttributes
+ WHERE org_abbrev = ':ORG_ABBREV') t
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
new file mode 100644
index 000000000..e8f448107
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
@@ -0,0 +1,71 @@
+:CREATE_AND_POPULATE
+ SELECT
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ SUBSTR(sequence.source_id, 1, 60) AS source_id, sequence.a_count,
+ sequence.c_count, sequence.g_count, sequence.t_count,
+ (sequence.length
+ - (sequence.a_count + sequence.c_count + sequence.g_count + sequence.t_count))
+ AS other_count,
+ sequence.length,
+ to_char((sequence.a_count + sequence.t_count) / sequence.length * 100, '99.99')
+ AS at_percent,
+ SUBSTR(tn.name, 1, 100) AS organism,
+ taxon.ncbi_tax_id,
+ taxon.taxon_id,
+ CASE WHEN sequence.description IS NULL THEN SUBSTR(tn.name, 1, 100)
+ ELSE SUBSTR(sequence.description, 1, 400)
+ END AS sequence_description,
+ SUBSTR(genbank.genbank_accession, 1, 20) AS genbank_accession,
+ SUBSTR(db.database_version, 1, 30) AS database_version, db.database_name,
+ SUBSTR(sequence.chromosome, 1, 20) AS chromosome,
+ sequence.external_database_release_id, sequence.sequence_ontology_id,
+ sequence.chromosome_order_num, so.source_id as so_id, so.name as sequence_type,
+ 1 as is_top_level,
+ sequence.na_sequence_id, organism.genome_source,
+ organism.name_for_filenames, coalesce(msa.has_msa, 0) as has_msa
+ FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon.taxon_id = :TAXON_ID and taxon.taxon_id = organism.taxon_id,
+ sres.OntologyTerm so,
+ ( SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description,
+ a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id
+ FROM dots.ExternalNaSequence
+ WHERE taxon_id = :TAXON_ID
+ ) sequence
+ LEFT JOIN
+ (SELECT drns.na_sequence_id, max(dr.primary_identifier) AS genbank_accession
+ FROM dots.dbrefNaSequence drns, sres.DbRef dr,
+ sres.ExternalDatabaseRelease gb_edr, sres.ExternalDatabase gb_ed
+ WHERE drns.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = gb_edr.external_database_release_id
+ AND gb_edr.external_database_id = gb_ed.external_database_id
+ AND gb_ed.name = 'GenBank'
+ GROUP BY drns.na_sequence_id
+ ) genbank ON sequence.na_sequence_id = genbank.na_sequence_id
+ LEFT JOIN
+ (SELECT edr.external_database_release_id,
+ edr.version AS database_version, ed.name AS database_name
+ FROM sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE edr.external_database_id = ed.external_database_id
+ ) db ON sequence.external_database_release_id = db.external_database_release_id
+ LEFT JOIN
+ (SELECT a_na_sequence_id as na_sequence_id, 1 as has_msa
+ FROM apidb.Synteny syn
+ GROUP BY a_na_sequence_id
+ ) msa ON sequence.na_sequence_id = msa.na_sequence_id
+ LEFT JOIN
+ (SELECT taxon_id, max(name) as name
+ FROM sres.TaxonName
+ WHERE name_class = 'scientific name'
+ GROUP BY taxon_id
+ ) tn ON sequence.taxon_id = tn.taxon_id
+ WHERE
+ sequence.taxon_id = taxon.taxon_id
+ AND sequence.sequence_ontology_id = so.ontology_term_id
+ AND sequence.taxon_id = :TAXON_ID
+ AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle')
+ ORDER BY organism, source_id
+
+
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
new file mode 100644
index 000000000..de455dd7f
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
@@ -0,0 +1,11 @@
+ create unique index pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (org_abbrev, lower(source_id), project_id)
+ ;
+
+ create unique index SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (org_abbrev, source_id)
+ ;
+
+ create unique index SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (org_abbrev, na_sequence_id)
+ ;
+
+ create unique index SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (org_abbrev, taxon_id, source_id)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql
new file mode 100644
index 000000000..b2a85443d
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql
@@ -0,0 +1,32 @@
+:CREATE_AND_POPULATE
+ WITH org_tot AS (
+ SELECT
+ gs.organism
+ , min(gij.total_unique) as min_annot_score
+ , PERCENTILE_cont(0.005) within group (order by gij.total_unique asc) as perc005_annot_score
+ , PERCENTILE_cont(0.01) within group (order by gij.total_unique asc) as perc01_annot_score
+ , min(gij.percent_max) as min_annot_percent_max
+ , PERCENTILE_cont(0.0001) within group (order by gij.percent_max asc) as perc0001_annot_percent_max
+ , PERCENTILE_cont(0.0005) within group (order by gij.percent_max asc) as perc0005_annot_percent_max
+ , floor(max(gij.segment_end - gij.segment_start) * 1.25) as max_intron_length
+ FROM :SCHEMA.geneintronjunction gij
+ , :SCHEMA.genomicseqattributes gs
+ WHERE gs.na_sequence_id = gij.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ AND gij.org_abbrev = ':ORG_ABBREV'
+ AND gs.org_abbrev = ':ORG_ABBREV'
+ GROUP BY gs.organism
+ )
+ SELECT gs.na_sequence_id
+ , gs.source_id
+ , ot.*
+ , ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ FROM :SCHEMA.genomicseqattributes gs
+ , org_tot ot
+ WHERE gs.organism = ot.organism
+ AND gs.na_sequence_id in (SELECT DISTINCT ij.na_sequence_id FROM apidb.intronjunction ij, dots.nasequence s where ij.na_sequence_id = s.na_sequence_id and s.taxon_id = :TAXON_ID)
+ AND gs.org_abbrev = ':ORG_ABBREV'
+:DECLARE_PARTITION
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql
new file mode 100644
index 000000000..73401d0fe
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql
@@ -0,0 +1,2 @@
+create index GeneIntJuncStat_ix on :SCHEMA.GenomicSeqJunctionStats (na_sequence_id)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSequenceId.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceId.psql
new file mode 100644
index 000000000..489b45043
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSequenceId.psql
@@ -0,0 +1,24 @@
+:CREATE_AND_POPULATE
+ SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date
+ FROM (
+ SELECT ns.source_id as id, ns.source_id as sequence
+ FROM dots.NaSequence ns, sres.OntologyTerm oterm
+ WHERE ns.sequence_ontology_id = oterm.ontology_term_id
+ AND ns.taxon_id = :TAXON_ID
+ AND oterm.name in ('random_sequence', 'contig', 'supercontig', 'chromosome','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle','kinetoplast')
+ UNION
+ SELECT dr.primary_identifier AS id, ns.source_id AS sequence
+ FROM dots.NaSequence ns, dots.DbRefNaSequence drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND ns.taxon_id = :TAXON_ID
+ AND ns.na_sequence_id = drnf.na_sequence_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ ) subquery1
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSequenceId_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceId_ix.psql
new file mode 100644
index 000000000..0cfa5601e
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSequenceId_ix.psql
@@ -0,0 +1,9 @@
+ CREATE INDEX genSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id)
+ ;
+
+ CREATE INDEX GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence)
+ ;
+
+ CREATE INDEX GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence.psql
new file mode 100644
index 000000000..bc1f31fb2
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence.psql
@@ -0,0 +1,13 @@
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ sa.source_id,
+ ns.sequence
+ FROM :SCHEMA.GenomicSeqAttributes sa, dots.NaSequence ns
+ WHERE sa.na_sequence_id = ns.na_sequence_id
+ and sa.org_abbrev = ':ORG_ABBREV'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql
new file mode 100644
index 000000000..dc3711740
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql
@@ -0,0 +1,3 @@
+ create index GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
new file mode 100644
index 000000000..db5e94653
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
@@ -0,0 +1,57 @@
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
+ ggt.taxon_id, ggt.is_not, replace(ggt.go_id, '_', ':') as go_id,
+ ggt.go_term_id, ggt.ontology,
+ replace(ggt.go_term_name, '_',' ') as go_term_name, ggt.source,
+ ggt.evidence_code,
+ CASE ggt.evidence_code WHEN 'IEA' THEN 'Computed' ELSE 'Curated' END as evidence_category,
+ ggt.reference, ggt.evidence_code_parameter,
+ ol.min_depth as depth,
+ case
+ when gs.ontology_term_id is null then 0
+ else 1
+ end as is_go_slim
+ FROM :SCHEMA.GeneGoTerms ggt
+ LEFT JOIN :SCHEMA.OntologyLevels ol ON ggt.go_term_id = ol.ontology_term_id
+ LEFT JOIN (
+ SELECT distinct ontology_term_id
+ FROM apidb.GoSubset
+ WHERE go_subset_term = 'goslim_generic'
+ ) gs ON ggt.go_term_id = gs.ontology_term_id
+ WHERE ggt.org_abbrev = ':ORG_ABBREV'
+ UNION
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
+ ggt.taxon_id, ggt.is_not,
+ replace (substr(ot.source_id, 1, 25),'_',':') as go_id,
+ ot.ontology_term_id as go_term_id, ggt.ontology,
+ replace (substr(ot.name, 1,250),'_',' ') as go_term_name,
+ ggt.source, ggt.evidence_code,
+ CASE ggt.evidence_code WHEN 'IEA' THEN 'Computed' ELSE 'Curated' END as evidence_category,
+ ggt.reference,
+ ggt.evidence_code_parameter,
+ ol.min_depth as depth,
+ case
+ when gs.ontology_term_id is null then 0
+ else 1
+ end as is_go_slim
+ FROM :SCHEMA.GeneGoTerms ggt, sres.OntologyRelationship orel,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr,
+ sres.OntologyTerm ot
+ LEFT JOIN :SCHEMA.OntologyLevels ol ON ot.ontology_term_id = ol.ontology_term_id
+ LEFT JOIN (
+ SELECT distinct ontology_term_id
+ FROM apidb.GoSubset
+ WHERE go_subset_term = 'goslim_generic'
+ ) gs ON ot.ontology_term_id = gs.ontology_term_id
+ WHERE ggt.go_term_id = orel.subject_term_id
+ AND orel.object_term_id = ot.ontology_term_id
+ AND edr.external_database_release_id = ot.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name ='GO_RSRC'
+ AND ggt.org_abbrev = ':ORG_ABBREV'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql
new file mode 100644
index 000000000..2d0531bdf
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql
@@ -0,0 +1,8 @@
+ create index GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source)
+ ;
+
+ create index GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary
+ (ontology, gene_source_id, is_not, is_go_slim,
+ go_id, go_term_name, evidence_code, evidence_category)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql
new file mode 100644
index 000000000..1e8d38653
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql
@@ -0,0 +1,53 @@
+:CREATE_AND_POPULATE
+with annotatedJunctions AS (
+ select gene_source_id
+ , count(*) as annotated_count
+ from (
+ select distinct ta.gene_source_id
+ , il.na_sequence_id
+ , il.start_min
+ , il.end_max
+ , il.is_reversed
+ from apidb.intronlocation il
+ inner join :SCHEMA.transcriptattributes ta
+ ON il.parent_id = ta.na_feature_id
+ ) group by gene_source_id
+), exptJunctions as (
+ select gij.gene_source_id
+ , case when gij.total_unique >= stats.perc01_annot_score and gij.percent_max >= 2
+ then 'High'
+ else 'Low'
+ end as conf
+ FROM :SCHEMA.GeneIntronJunction gij
+ JOIN :SCHEMA.GenomicSeqJunctionStats stats
+ ON gij.na_sequence_id = stats.na_sequence_id
+ where gij.annotated_intron = 'Yes' -- we only care about the annotated ones
+ and gij.org_abbrev = ':ORG_ABBREV'
+ and stats.org_abbrev = ':ORG_ABBREV'
+), exptJunctionCounts as (
+ select gene_source_id
+ , conf
+ , count(*) as intron_count
+ from exptJunctions
+ group by gene_source_id
+ , conf
+)
+select aj.gene_source_id
+ , CASE WHEN aj.annotated_count = ejc.intron_count
+ THEN 'All-high'
+ ELSE 'Any-high'
+ END as string_value
+ from annotatedJunctions aj
+ left join exptJunctionCounts ejc on aj.gene_source_id = ejc.gene_source_id
+ where ejc.conf = 'High'
+ AND ejc.intron_count > 0
+select aj.gene_source_id
+ , CASE WHEN aj.annotated_count = sum(ejc.intron_count)
+ THEN 'All-low'
+ ELSE 'Any-low'
+ END as string_value
+ from annotatedJunctions aj
+ left join exptJunctionCounts ejc on aj.gene_source_id = ejc.gene_source_id
+group by aj.gene_source_id, aj.annotated_count
+having sum(ejc.intron_count) > 0
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/IntronSupportLevel_ix.psql b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/orgSpecific/IntronUtrCoords.psql b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords.psql
new file mode 100644
index 000000000..cfed91dac
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords.psql
@@ -0,0 +1,32 @@
+:CREATE_AND_POPULATE
+ SELECT na_feature_id, source_id,
+ '[' || regexp_replace(string_agg(text,',' ORDER BY start_min), '.quot;', '"' ) || ']' AS gen_rel_intron_utr_coords,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM (
+ SELECT na_feature_id, source_id, start_min,
+ '["' || feature_type || '",' || start_min || ',' || end_max || ']' AS text
+ FROM (
+ SELECT fl.feature_type, tl.na_feature_id, tl.feature_source_id AS source_id,
+ CASE
+ WHEN tl.is_reversed = 1
+ THEN tl.end_max - fl.end_max + 1
+ ELSE fl.start_min - tl.start_min + 1
+ END AS start_min,
+ CASE
+ WHEN tl.is_reversed = 1
+ THEN tl.end_max - fl.start_min + 1
+ ELSE fl.end_max - tl.start_min + 1
+ END AS end_max
+ FROM
+ apidb.TranscriptLocation tl, apidb.FeatureLocation fl, dots.nasequence nas
+ WHERE
+ tl.na_feature_id = fl.parent_id
+ AND fl.na_sequence_id = nas.na_sequence_id
+ AND nas.taxon_id = :TAXON_ID
+ AND fl.feature_type in('UTR', 'Intron')
+ AND tl.is_top_level = 1
+ AND fl.is_top_level = 1
+ ) t1
+ ) t2
+ GROUP BY na_feature_id, source_id
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql
new file mode 100644
index 000000000..78bff1d74
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql
@@ -0,0 +1,8 @@
+ CREATE INDEX iuc_srcid_ix
+ ON :SCHEMA.IntronUtrCoords (source_id, na_feature_id)
+ ;
+
+ CREATE INDEX iuc_nfid_ix
+ ON :SCHEMA.IntronUtrCoords (na_feature_id, source_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql
new file mode 100644
index 000000000..35b6c6318
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql
@@ -0,0 +1,23 @@
+/*
+ The tuning table this was based on used a loop over the nasequences. We can add that back if slow but I am:
+ 1. using geneattributes which will make this faster than joining genefeature to nalocation and grouping
+ 2. using input tables here that are already partitioned
+*/
+:CREATE_AND_POPULATE
+SELECT ga.na_sequence_id
+ , ga.start_min
+ , ga.is_reversed
+ , ga.end_max
+ , ga.na_feature_id
+ , ga.source_id,
+ round(sum(nafe.value)::NUMERIC,2) as total_expression
+ FROM :SCHEMA.GeneAttributes ga
+ , :SCHEMA.JunctionToCoverageProfileMapping je
+ , results.nafeatureexpression nafe
+ WHERE ga.na_feature_id = nafe.na_feature_id
+ AND nafe.protocol_app_node_id = je.exp_pan_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+ AND je.org_abbrev = ':ORG_ABBREV'
+ GROUP BY ga.na_sequence_id,l.start_min,l.is_reversed,l.end_max,ga.na_feature_id,ga.source_id
+:DECLARE_PARTITION
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql
new file mode 100644
index 000000000..a2d30c4cb
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql
@@ -0,0 +1,5 @@
+create index gnidloc_nafid_ix on :SCHEMA.JunctionGeneLocation (na_feature_id)
+ ;
+
+CREATE INDEX gnattidloc_pk_ix ON :SCHEMA.JunctionGeneLocation (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql
new file mode 100644
index 000000000..462937437
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql
@@ -0,0 +1,143 @@
+drop table if exists :SCHEMA.:ORG_ABBREVJunctionExpressionTmp;
+
+drop table if exists :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp;
+
+
+/*
+ Join junctions to Expression values and get multiplier and decide on strand switching
+*/
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunctionExpressionTmp AS
+ WITH stats AS (
+ SELECT protocol_app_node_id
+ --, 'total' as type
+ --, count(*) as total_junctions
+ --, sum(unique_reads) as total_reads
+ , round(1000000/sum(unique_reads),4) as multiplier
+ FROM apidb.IntronJunction ij, dots.nasequence s
+ WHERE unique_reads >= 1
+ AND ij.na_sequence_id = s.na_sequence_id
+ AND s.taxon_id = 1802185
+ GROUP BY protocol_app_node_id
+ ), ij AS (
+ SELECT pj.output_pan_id as junctions_pan_id
+ , p.output_pan_id as expression_pan_id
+ , avg(nafe.value) as avg_value,pan.name as exp_name
+ , regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name
+ FROM :SCHEMA.panio p
+ , :SCHEMA:panio pj
+ , results.nafeatureexpression nafe
+ , study.protocolappnode pan
+ WHERE pj.output_pan_id in (select distinct protocol_app_node_id from stats)
+ AND pj.input_pan_id = p.input_pan_id
+ AND p.output_pan_id = pan.protocol_app_node_id
+ AND pan.name like '%tpm - unique%' -- NOTE: probably better to use raw counts here??
+ AND p.output_pan_id = nafe.protocol_app_node_id
+ AND p.org_abbrev = ':ORG_ABBREV'
+ AND pj.org_abbrev = ':ORG_ABBREV'
+ GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
+ ) , part AS (
+ SELECT
+ ij.junctions_pan_id
+ , ij.avg_value
+ , stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ;
+
+create index :ORG_ABBREVjunexpgijtmp_ix on :SCHEMA.:ORG_ABBREVJunctionExpressionTmp(junctions_pan_id,exp_pan_id)
+ ;
+
+
+/*
+ Grab general mapping stats
+*/
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp (
+ junctions_pan_id,
+ read_length,
+ mapped_reads,
+ avg_mapping_coverage,
+ num_replicates
+) AS
+ SELECT junctions_pan_id
+ , round(avg(average_read_length - 2),1) as read_length
+ , round(avg(number_mapped_reads),1) as mapped_reads
+ , round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) as avg_mapping_coverage
+ , count(*) as num_replicates
+ FROM (SELECT je.junctions_pan_id
+ , ca.value::NUMERIC as average_read_length
+ , cb.value::NUMERIC as number_mapped_reads
+ , cc.value::NUMERIC as avg_mapping_coverage
+ FROM :SCHEMA.:ORG_ABBREVJunctionExpressionTmp je
+ , :SCHEMA.PANIO ioa
+ , STUDY.CHARACTERISTIC ca
+ , STUDY.CHARACTERISTIC cb
+ , sres.ontologyterm ota
+ , sres.ontologyterm otb,
+ STUDY.CHARACTERISTIC cc, sres.ontologyterm otc
+ WHERE je.junctions_pan_id = ioa.output_pan_id
+ AND ioa.input_pan_id = ca.protocol_app_node_id
+ AND ca.value is not null
+ AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID
+ AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length'
+ AND ca.protocol_app_node_id = cb.protocol_app_node_id
+ AND cb.value is not null
+ AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID
+ AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads'
+ AND ca.protocol_app_node_id = cc.protocol_app_node_id
+ AND cc.value is not null
+ AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID
+ AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage'
+ AND ioa.org_abbrev = ':ORG_ABBREV'
+ ) t
+ GROUP by junctions_pan_id
+;
+
+
+CREATE INDEX :ORG_ABBREVmpstats_pk_ix on :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp
+ (junctions_pan_id,read_length,mapped_reads,avg_mapping_coverage,num_replicates)
+;
+
+
+
+/*
+ This is the main table
+*/
+:CREATE_AND_POPULATE
+ SELECT DISTINCT je.sample_name
+ , je.junctions_pan_id
+ , je.exp_pan_id
+ , ms.read_length
+ , ms.mapped_reads
+ , ms.avg_mapping_coverage
+ , ms.num_replicates
+ , je.switch_strands
+ , je.multiplier
+ , ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+
+ FROM junexpgijtmp je
+ , mappingstatsgijtmp ms
+ WHERE je.junctions_pan_id = ms.junctions_pan_id
+:DECLARE_PARTITION
+ ;
+
+
+drop table :SCHEMA.:ORG_ABBREVJunctionExpressionTmp
+;
+
+drop table :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql
new file mode 100644
index 000000000..c724f94d6
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql
@@ -0,0 +1,2 @@
+create index JunctionCovMap_ix on :SCHEMA.JunctionToCoverageProfileMapping(junctions_pan_id,exp_pan_id)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation.psql b/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation.psql
new file mode 100644
index 000000000..bcdedad21
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation.psql
@@ -0,0 +1,10 @@
+:CREATE_AND_POPULATE
+ select tn.name as organism, o.name_for_filenames,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date
+ from apidb.Organism o, sres.TaxonName tn
+ where o.taxon_id = tn.taxon_id
+ and tn.name_class = 'scientific name'
+ and o.taxon_id = :TAXON_ID
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation_ix.psql b/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank.psql b/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank.psql
new file mode 100644
index 000000000..34c531b5c
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank.psql
@@ -0,0 +1,49 @@
+:CREATE_AND_POPULATE
+ WITH organism_rank AS (
+ SELECT tn1.name as organism, o.public_abbrev, tn2.name as parent_organism,
+ case when tn2.name = 'Oomycetes' then 'class' else r.rank end as rank
+ FROM (
+ WITH RECURSIVE cte AS(
+ SELECT taxon_id input, taxon_id, rank, parent_id
+ FROM sres.taxon
+ WHERE taxon_id IN (SELECT taxon_id FROM apidb.organism WHERE taxon_id = :TAXON_ID and is_annotated_genome = 1)
+ UNION
+ SELECT cte.input, t.taxon_id, t.rank, t.parent_id
+ FROM sres.taxon t, cte
+ WHERE cte.parent_id = t.taxon_id
+ )
+ SELECT input, taxon_id, rank
+ FROM cte
+ ) r
+ , sres.taxonname tn1
+ , sres.taxonname tn2
+ , apidb.organism o
+ WHERE r.input = tn1.taxon_id
+ AND r.taxon_id = tn2.taxon_id
+ AND tn1.name_class = 'scientific name'
+ AND tn2.name_class = 'scientific name'
+ AND (r.rank in ('phylum', 'genus', 'species', 'kingdom', 'class') or (r.rank = 'no rank' and tn2.name = 'Oomycetes'))
+ AND tn1.taxon_id = o.taxon_id
+ )
+ SELECT
+ ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ , organisms.organism
+ , organisms.public_abbrev
+ , coalesce(phylum.parent_organism, 'N/A') as phylum
+ , coalesce(genus.parent_organism, 'N/A') as genus
+ , coalesce(species.parent_organism, 'N/A') as species
+ , coalesce(kingdom.parent_organism, 'N/A') as kingdom
+ , coalesce(class.parent_organism, 'N/A') as class
+ FROM
+ (select distinct organism, public_abbrev from organism_rank) organisms
+ LEFT JOIN ( select * from organism_rank where rank= 'phylum') phylum ON organisms.organism = phylum.organism
+ LEFT JOIN ( select * from organism_rank where rank= 'genus') genus ON organisms.organism = genus.organism
+ LEFT JOIN ( select * from organism_rank where rank= 'species') species ON organisms.organism = species.organism
+ LEFT JOIN ( select * from organism_rank where rank= 'kingdom') kingdom ON organisms.organism = kingdom.organism
+ LEFT JOIN ( select * from organism_rank where rank= 'class') class ON organisms.organism = class.organism
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank_ix.psql b/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/orgSpecific/PANExtDbRls.psql b/Model/lib/psql/webready/orgSpecific/PANExtDbRls.psql
new file mode 100644
index 000000000..524ef017c
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/PANExtDbRls.psql
@@ -0,0 +1,42 @@
+:CREATE_AND_POPULATE
+
+ SELECT distinct protocol_app_node_id as pan_id, external_database_release_id, name as dataset_name,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM (
+ SELECT
+ sl.protocol_app_node_id
+ , s.external_database_release_id
+ , d.name
+ FROM
+ study.nodeSet s
+ , study.nodeNodeSet sl
+ , sres.externaldatabaserelease r
+ , sres.externaldatabase d
+ , apidb.datasource ds
+ WHERE
+ s.external_database_release_id = r.external_database_release_id
+ and r.external_database_id = d.external_database_id
+ and s.node_set_id = sl.node_set_id
+ and s.external_database_release_id is not null
+ and d.name = ds.external_database_name
+ and r.version = ds.version
+ and ds.taxon_id = :TAXON_ID
+ UNION
+ SELECT pan.protocol_app_node_id
+ , pan.external_database_release_id
+ , d.name
+ FROM study.protocolappnode pan
+ , sres.externaldatabaserelease r
+ , sres.externaldatabase d
+ , apidb.datasource ds
+ WHERE
+ pan.external_database_release_id = r.external_database_release_id
+ and r.external_database_id = d.external_database_id
+ and pan.external_database_release_id is not null
+ and d.name = ds.external_database_name
+ and r.version = ds.version
+ and ds.taxon_id = :TAXON_ID
+ ) t
+ ORDER BY external_database_release_id, protocol_app_node_id
+
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql
new file mode 100644
index 000000000..b9b918779
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql
@@ -0,0 +1,24 @@
+:CREATE_AND_POPULATE
+ SELECT DISTINCT io.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM (
+ SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id,
+ o.protocol_app_node_id output_pan_id,
+ in_type.source_id as input_pan_type_source_id,
+ --in_type.name as input_pan_type,
+ in_type.ontology_term_id as input_pan_type_id,
+ out_type.source_id as output_pan_type_source_id,
+ --out_type.name as output_pan_type,
+ out_type.ontology_term_id as output_pan_type_id
+ FROM :SCHEMA.panextdbrls panExtDbRls
+ INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id
+ INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id
+ INNER JOIN study.Output o on o.protocol_app_id = pa.protocol_app_id
+ INNER JOIN study.ProtocolAppNode in_pan on i.protocol_app_node_id = in_pan.protocol_app_node_id
+ INNER JOIN study.ProtocolAppNode out_pan on o.protocol_app_node_id = out_pan.protocol_app_node_id
+ LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id
+ LEFT JOIN sres.OntologyTerm in_type ON in_pan.type_id = in_type.ontology_term_id
+ WHERE panExtDbRls.org_abbrev = ':ORG_ABBREV'
+ ) io
+ ORDER BY io.input_pan_id, io.output_pan_id
+
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql b/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql
new file mode 100644
index 000000000..f809a0b72
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql
@@ -0,0 +1,29 @@
+ create index painio2_iix on :SCHEMA.PANIO
+ (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
+
+
+ ;
+
+
+
+ create index painio2_oix on :SCHEMA.PANIO
+ (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
+
+
+ ;
+
+
+
+ create index painio2_otypeix on :SCHEMA.PANIO
+ (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id)
+
+
+ ;
+
+
+
+ create index painio2_itypeix on :SCHEMA.PANIO
+ (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id)
+
+
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
new file mode 100644
index 000000000..fbac42a0d
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
@@ -0,0 +1,28 @@
+DROP TABLE IF EXISTS :SCHEMA.ORG_ABBREVPathwaysGeneTableTmp;
+
+
+/* ATTENTION: This table is empty. We will populate it in the comparative graph as loading depends on TranscriptPathway.
+We are creating it here so that it is partitioned */
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp (
+ gene_source_id VARCHAR(80),
+ pathway_source_id VARCHAR(50),
+ pathway_name VARCHAR(150),
+ reactions NUMERIC(8),
+ enzyme VARCHAR(20),
+ expasy_url TEXT,
+ pathway_source TEXT,
+ exact_match VARCHAR(8),
+
+ --for partitioning
+ project_id VARCHAR(20),
+ org_abbrev VARCHAR(20),
+ modification_date timestamp
+);
+
+
+:CREATE_AND_POPULATE
+SELECT pgt.* FROM :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp pgt
+:DECLARE_PARTITION;
+
+DROP TABLE :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql
new file mode 100644
index 000000000..f69349df0
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql
@@ -0,0 +1,6 @@
+ create index pgt_ix on :SCHEMA.PathwaysGeneTable
+ (gene_source_id, project_id, pathway_source_id, pathway_name,
+ reactions, enzyme, expasy_url, pathway_source, exact_match)
+
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/PdbSimilarity.psql b/Model/lib/psql/webready/orgSpecific/PdbSimilarity.psql
new file mode 100644
index 000000000..b3de1cfe9
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/PdbSimilarity.psql
@@ -0,0 +1,33 @@
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ ta.source_id, eas.source_id AS pdb_chain,
+ substr(eas.description, 1, 100) AS pdb_title,
+ substr(eas.source_id
+ , 1
+ , length(eas.source_id) - (
+ CASE strpos(reverse(eas.source_id), '_')
+ WHEN 0 THEN length(eas.source_id)
+ ELSE strpos(reverse(eas.source_id), '_') END
+ )
+ ) AS pdb_id,
+ s.evalue_mant, s.evalue_exp,
+ s.pident as percent_identity,
+ ROUND((s.length / ta.protein_length) * 100) AS percent_plasmo_coverage,
+ SUBSTR(tn.name, 1, 100) AS taxon,
+ eas.taxon_id as pdb_taxon_id, ta.taxon_id as gene_taxon_id
+ FROM apidb.PdbSimilarity s,
+ apiDB.ProteinDataBank eas,
+ sres.TaxonName tn,
+ :SCHEMA.TranscriptAttributes ta
+ WHERE ta.aa_sequence_id = s.aa_sequence_id
+ AND s.pident = eas.protein_data_bank_id
+ and tn.name_class = 'scientific name'
+ AND eas.taxon_id = tn.taxon_id
+ and ta.org_abbrev = ':ORG_ABBREV'
+ ORDER BY ta.source_id, eas.source_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/PdbSimilarity_ix.psql b/Model/lib/psql/webready/orgSpecific/PdbSimilarity_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
new file mode 100644
index 000000000..14be4666d
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
@@ -0,0 +1,220 @@
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVGoTermList_tmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGoTermList_tmp AS
+ SELECT aa_sequence_id, ontology, source,
+ string_agg(go_term_name, ';' ORDER BY go_term_name) AS go_terms,
+ string_agg(go_id, ';' ORDER BY go_term_name) AS go_ids
+ FROM (
+ SELECT aa_sequence_id, ontology,
+ CASE evidence_code WHEN 'IEA' THEN 'predicted' ELSE 'annotated' END AS source, go_term_name, go_id
+ FROM :SCHEMA.GeneGoTerms
+ WHERE org_abbrev = ':ORG_ABBREV'
+
+ ) t
+ GROUP BY aa_sequence_id, ontology, source
+
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp AS
+ SELECT DISTINCT gts.aa_sequence_id,
+ substr(annotated_go_component.go_terms, 1, 300) AS annotated_go_component,
+ substr(annotated_go_function.go_terms, 1, 300) AS annotated_go_function,
+ substr(annotated_go_process.go_terms, 1, 300) AS annotated_go_process,
+ substr(predicted_go_component.go_terms, 1, 300) AS predicted_go_component,
+ substr(predicted_go_function.go_terms, 1, 300) AS predicted_go_function,
+ substr(predicted_go_process.go_terms, 1, 300) AS predicted_go_process,
+ substr(annotated_go_component.go_ids, 1, 300) AS annotated_go_id_component,
+ substr(annotated_go_function.go_ids, 1, 300) AS annotated_go_id_function,
+ substr(annotated_go_process.go_ids, 1, 300) AS annotated_go_id_process,
+ substr(predicted_go_component.go_ids, 1, 300) AS predicted_go_id_component,
+ substr(predicted_go_function.go_ids, 1, 300) AS predicted_go_id_function,
+ substr(predicted_go_process.go_ids, 1, 300) AS predicted_go_id_process
+ FROM
+ (SELECT DISTINCT aa_sequence_id FROM :SCHEMA.GoTermSummary where org_abbrev = ':ORG_ABBREV') gts
+ LEFT JOIN (
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
+ WHERE source = 'annotated' AND ontology = 'Cellular Component'
+ ) annotated_go_component ON
+ gts.aa_sequence_id = annotated_go_component.aa_sequence_id
+ AND 'annotated' = annotated_go_component.source
+ AND 'Cellular Component' = annotated_go_component.ontology
+ LEFT JOIN (
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
+ WHERE source = 'annotated' AND ontology = 'Molecular Function'
+ ) annotated_go_function ON
+ gts.aa_sequence_id = annotated_go_function.aa_sequence_id
+ AND 'annotated' = annotated_go_function.source
+ AND 'Molecular Function' = annotated_go_function.ontology
+ LEFT JOIN (
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
+ WHERE source = 'annotated' AND ontology = 'Biological Process'
+ ) annotated_go_process ON
+ gts.aa_sequence_id = annotated_go_process.aa_sequence_id
+ AND 'annotated' = annotated_go_process.source
+ AND 'Biological Process' = annotated_go_process.ontology
+ LEFT JOIN (
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
+ WHERE source = 'predicted' AND ontology = 'Cellular Component'
+ ) predicted_go_component ON
+ gts.aa_sequence_id = predicted_go_component.aa_sequence_id
+ AND 'predicted' = predicted_go_component.source
+ AND 'Cellular Component' = predicted_go_component.ontology
+ LEFT JOIN (
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
+ WHERE source = 'predicted' AND ontology = 'Molecular Function'
+ ) predicted_go_function ON
+ gts.aa_sequence_id = predicted_go_function.aa_sequence_id
+ AND 'predicted' = predicted_go_function.source
+ AND 'Molecular Function' = predicted_go_function.ontology
+ LEFT JOIN (
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
+ WHERE source = 'predicted' AND ontology = 'Biological Process'
+ ) predicted_go_process ON
+ gts.aa_sequence_id = predicted_go_process.aa_sequence_id
+ AND 'predicted' = predicted_go_process.source
+ AND 'Biological Process' = predicted_go_process.ontology
+
+ ;
+
+
+
+ create index ProteinGoAttr_aaSequenceId_:ORG_ABBREV ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id)
+
+
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp AS
+ SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers
+ FROM (SELECT DISTINCT asec.aa_sequence_id,
+ ec.ec_number || ' (' || ec.description || ')' AS ec_number
+ FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq
+ WHERE ec.enzyme_class_id = asec.enzyme_class_id
+ AND seq.aa_sequence_id = asec.aa_sequence_id
+ AND seq.taxon_id = :TAXON_ID
+ AND NOT asec.evidence_code = 'OrthoMCLDerived'
+ ) t
+ GROUP BY aa_sequence_id
+
+ ;
+
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp;
+
+ --TODO: these rows will not exist in org specific land
+ -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp AS
+ -- SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived
+ -- FROM (SELECT DISTINCT asec.aa_sequence_id,
+ -- ec.ec_number || ' (' || ec.description || ')' AS ec_number
+ -- FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq
+ -- WHERE ec.enzyme_class_id = asec.enzyme_class_id
+ -- AND seq.aa_sequence_id = asec.aa_sequence_id
+ -- AND seq.taxon_id = :TAXON_ID
+ -- AND asec.evidence_code = 'OrthoMCLDerived'
+ -- ) t
+ -- GROUP BY aa_sequence_id
+
+ -- ;
+
+
+-- TODO: Filter the subqueries or break into tmp tables for performance
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ tas.source_id, tas.aa_sequence_id,
+ t.source_id as transcript_source_id,
+ gf.source_id as gene_source_id,
+ cdsl.na_sequence_id as na_sequence_id,
+ cdsl.is_reversed,
+ cdsl.start_min as cds_start,
+ cdsl.end_max as cds_end,
+ (taf.translation_stop - taf.translation_start) + 1 AS cds_length,
+ length(tas.sequence) AS protein_length,
+ coalesce(transmembrane.tm_domains, 0) AS tm_count,
+ tas.molecular_weight,
+ asa.min_molecular_weight, asa.max_molecular_weight,
+ asa.isoelectric_point, asa.hydropathicity_gravy_score,
+ asa.aromaticity_score,
+ SUBSTR(sigp.peptide_sequence, 1, 200) as signalp_peptide,
+ ec_numbers,
+ --ec_numbers_derived,
+ go.annotated_go_component,
+ go.annotated_go_function,
+ go.annotated_go_process,
+ go.predicted_go_component,
+ go.predicted_go_function,
+ go.predicted_go_process,
+ go.annotated_go_id_component,
+ go.annotated_go_id_function,
+ go.annotated_go_id_process,
+ go.predicted_go_id_component,
+ go.predicted_go_id_function,
+ go.predicted_go_id_process,
+ SUBSTR(coalesce(rt1.anticodon, rt2.anticodon), 1, 3) AS anticodon,
+ 0 AS has_seqedit,
+ row_number() over (partition by t.source_id order by tas.length desc) as rank_in_transcript,
+ uniprot.uniprot_ids
+ FROM
+ dots.Transcript t
+ INNER JOIN dots.GeneFeature gf ON gf.na_feature_id = t.parent_id
+ INNER JOIN dots.nasequence nas ON gf.na_sequence_id = nas.na_sequence_id AND nas.taxon_id = :TAXON_ID
+ INNER JOIN dots.TranslatedAaFeature taf ON t.na_feature_id = taf.na_feature_id
+ INNER JOIN dots.TranslatedAaSequence tas ON taf.aa_sequence_id = tas.aa_sequence_id
+ LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id
+ LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id
+ LEFT JOIN apidb.AaSequenceAttribute asa ON taf.aa_sequence_id = asa.aa_sequence_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp go ON tas.aa_sequence_id = go.aa_sequence_id
+ LEFT JOIN (
+ SELECT aa_sequence_id, string_agg(peptide_sequence, ', ') peptide_sequence
+ FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :SCHEMA.SignalPeptideDomains where org_abbrev = ':ORG_ABBREV') t
+ GROUP BY aa_sequence_id
+ ) sigp ON tas.aa_sequence_id = sigp.aa_sequence_id
+ LEFT JOIN (
+ SELECT protein_source_id, na_sequence_id, is_reversed,
+ MIN(start_min) AS start_min, MAX(end_max) AS end_max
+ FROM apidb.CdsLocation WHERE is_top_level=1
+ GROUP BY protein_source_id, na_sequence_id, is_reversed
+ ) cdsl ON tas.source_id = cdsl.protein_source_id
+ LEFT JOIN (
+ SELECT aa_sequence_id, max(tm_domains) AS tm_domains
+ FROM (SELECT tmaf.aa_sequence_id, COUNT(*) AS tm_domains
+ FROM dots.TransmembraneAaFeature tmaf, dots.AaLocation al
+ WHERE tmaf.aa_feature_id = al.aa_feature_id
+ GROUP BY tmaf.aa_sequence_id) tms
+ GROUP BY tms.aa_sequence_id
+ ) transmembrane ON tas.aa_sequence_id = transmembrane.aa_sequence_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp ec ON tas.aa_sequence_id = ec.aa_sequence_id
+ --LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id
+ LEFT JOIN (
+ SELECT af.aa_sequence_id,
+ string_agg(dbref.primary_identifier, ',' order by dbref.primary_identifier) as uniprot_ids
+ FROM sres.ExternalDatabase d, sres.ExternalDatabaseRelease r,
+ sres.DbRef, dots.DbRefAaFeature daf, dots.AaFeature af
+ WHERE d.name like 'Uniprot%'
+ AND d.external_database_id = r.external_database_id
+ AND r.external_database_release_id = dbref.external_database_release_id
+ AND dbref.db_ref_id = daf.db_ref_id
+ AND daf.aa_feature_id = af.aa_feature_id
+ GROUP BY af.aa_sequence_id
+ ) uniprot ON tas.aa_sequence_id = uniprot.aa_sequence_id
+ ORDER BY tas.source_id
+
+
+:DECLARE_PARTITION;
+
+
+
+ update :SCHEMA.ProteinAttributes gaup
+ set has_seqedit = 1
+ where source_id in (select source_id from apidb.seqedit)
+ and org_abbrev = ':ORG_ABBREV'
+
+ ;
+
+drop table :SCHEMA.:ORG_ABBREVGoTermList_tmp;
+drop table :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp;
+drop table :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp;
+--drop table :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp;
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes_ix.psql
new file mode 100644
index 000000000..3abf03219
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes_ix.psql
@@ -0,0 +1,6 @@
+ CREATE INDEX PA_sourceId ON :SCHEMA.ProteinAttributes (source_id)
+ ;
+
+ CREATE INDEX PA_aaSequenceId ON :SCHEMA.ProteinAttributes (aa_sequence_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql
new file mode 100644
index 000000000..0d2f58218
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql
@@ -0,0 +1,9 @@
+:CREATE_AND_POPULATE
+ WITH pAttr AS (
+ SELECT distinct source_id, aa_sequence_id
+ FROM :SCHEMA.ProteinAttributes where org_abbrev = ':ORG_ABBREV')
+ SELECT pa.source_id, tas.sequence,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM pAttr pa, dots.TranslatedAaSequence tas
+ WHERE pa.aa_sequence_id = tas.aa_sequence_id
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql
new file mode 100644
index 000000000..c91f81d84
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql
@@ -0,0 +1,3 @@
+ create index ProtSeq_ix on :SCHEMA.:ORG_ABBREVProteinSequence (source_id, project_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/RnaSeqStats.psql b/Model/lib/psql/webready/orgSpecific/RnaSeqStats.psql
new file mode 100644
index 000000000..8945fa2c9
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/RnaSeqStats.psql
@@ -0,0 +1,56 @@
+:CREATE_AND_POPULATE
+ select study_id, study_name, dataset_name, taxon_id, round(avg(num_reads::integer),0) as avg_unique_reads,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ from (select sl.node_set_id as study_id
+ , s.name || '[' || s.node_type || ']' as study_name
+ , ed.name as dataset_name
+ , ds.taxon_id
+ , c.value as num_reads
+ from apidb.Datasource ds
+ , sres.ExternalDatabase ed
+ , sres.ExternalDatabaseRelease edr
+ , study.nodeSet s, study.nodeNodeSet sl
+ , study.ProtocolAppNode pan
+ , study.Characteristic c
+ , sres.OntologyTerm ot
+ where ds.external_database_name = ed.name
+ and ed.external_database_id = edr.external_database_id
+ and edr.external_database_release_id = s.external_database_release_id
+ and sl.node_set_id = s.node_set_id
+ and sl.protocol_app_node_id = pan.protocol_app_node_id
+ and pan.protocol_app_node_id = c.protocol_app_node_id
+ and c.qualifier_id = ot.ontology_term_id
+ and (ot.source_id = 'EUPATH_0000460' or ot.source_id = 'EuPathUserDefined_00507')
+ and ds.taxon_id = :TAXON_ID
+ ) subquery1
+ group by study_id, study_name, dataset_name, taxon_id
+ union
+ select study_id, study_name, dataset_name, taxon_id, round(2*avg(num_reads::integer),0) as avg_unique_reads,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ from (select sl.node_set_id as study_id
+ , s.name || '[' || s.node_type || ']' as study_name
+ , ed.name as dataset_name
+ , ds.taxon_id
+ , c.value as num_reads
+ from apidb.Datasource ds
+ , sres.ExternalDatabase ed
+ , sres.ExternalDatabaseRelease edr
+ , study.nodeSet s, study.nodeNodeSet sl
+ , study.ProtocolAppNode pan
+ , study.Characteristic c
+ , sres.OntologyTerm ot
+ where ds.external_database_name = ed.name
+ and ed.external_database_id = edr.external_database_id
+ and edr.external_database_release_id = s.external_database_release_id
+ and sl.node_set_id = s.node_set_id
+ and sl.protocol_app_node_id = pan.protocol_app_node_id
+ and pan.protocol_app_node_id = c.protocol_app_node_id
+ and c.qualifier_id = ot.ontology_term_id
+ and (ot.source_id = 'EUPATH_0000468' or ot.source_id = 'EuPathUserDefined_00515' or ot.source_id = 'EUPATH_0000476' or ot.source_id = 'EuPathUserDefined_00523')
+ and ds.taxon_id = :TAXON_ID
+ ) subquery2
+ group by study_id, study_name, dataset_name, taxon_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/RnaSeqStats_ix.psql b/Model/lib/psql/webready/orgSpecific/RnaSeqStats_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql b/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql
new file mode 100644
index 000000000..e5b60d5e5
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql
@@ -0,0 +1,47 @@
+
+ CREATE TABLE :ORG_ABBREVSequencePieceClosure AS
+ SELECT sp.sequence_piece_id,
+ sp.virtual_na_sequence_id,
+ sp.piece_na_sequence_id,
+ sp.sequence_order,
+ sp.distance_from_left,
+ sp.uncertainty,
+ sp.strand_orientation,
+ sp.start_position,
+ sp.end_position,
+ sp.modification_date,
+ 1 AS edge_level
+ FROM dots.SequencePiece sp, dots.NaSequence ns
+ WHERE sp.piece_na_sequence_id = ns.na_sequence_id
+ AND (ns.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0)
+
+ ;
+
+
+
+ /* known issue: this should be run not just once, but iteratively
+ until it doesn't create new records. Currently (7/2008),
+ SequencePieces aren't nested even once. */
+ INSERT INTO :ORG_ABBREVSequencePieceClosure
+ (edge_level, virtual_na_sequence_id, piece_na_sequence_id,
+ distance_from_left, strand_orientation, modification_date,
+ start_position, end_position, sequence_order, sequence_piece_id)
+ SELECT 2, higher.virtual_na_sequence_id, lower.piece_na_sequence_id,
+ higher.distance_from_left,
+ case
+ when coalesce(higher.strand_orientation, '+') = coalesce(lower.strand_orientation, '+')
+ then '+'
+ else '-'
+ end as strand_orientation,
+ now(),
+ higher.start_position - lower.distance_from_left,
+ higher.end_position - lower.distance_from_left,
+ higher.sequence_order,
+ nextval('dots.sequencepiece_sq')
+ FROM :ORG_ABBREVSequencePieceClosure higher, :ORG_ABBREVSequencePieceClosure lower
+ WHERE higher.piece_na_sequence_id = lower.virtual_na_sequence_id
+ AND higher.start_position >= lower.start_position + lower.distance_from_left
+ AND higher.end_position <= lower.end_position + lower.distance_from_left
+
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains.psql b/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains.psql
new file mode 100644
index 000000000..a23b7ea12
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains.psql
@@ -0,0 +1,42 @@
+:CREATE_AND_POPULATE
+ SELECT
+ ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ , gf.source_id gene_source_id
+ , t.source_id transcript_source_id
+ , taf.na_feature_id
+ , spf.aa_feature_id
+ , spf.aa_sequence_id
+ , spf.parent_id
+ , aal.start_min
+ , aal.end_max
+ , spf.algorithm_name
+ , substr(s.sequence, aal.end_max::INTEGER, 1) peptide_sequence
+ FROM
+ dots.SignalPeptideFeature spf
+ , dots.AaLocation aal
+ , dots.TranslatedAaFeature taf
+ , dots.TranslatedAaSequence tas
+ , dots.GeneFeature gf
+ , dots.AaSequence s
+ , dots.Transcript t
+ WHERE
+ spf.aa_sequence_id = s.aa_sequence_id
+ AND aal.aa_feature_id = spf.aa_feature_id
+ AND t.na_feature_id = taf.na_feature_id
+ AND taf.aa_sequence_id = tas.aa_sequence_id
+ AND tas.aa_sequence_id = spf.aa_sequence_id
+ AND gf.na_feature_id = t.parent_id
+ AND tas.taxon_id = :TAXON_ID
+ AND (spf.signal_probability >= .5
+ OR spf.signal_probability IS NULL
+ OR ((spf.means_score + spf.maxy_score) / 2) >= .5
+ OR ( spf.maxy_conclusion + spf.maxc_conclusion + spf.maxs_conclusion + spf.means_conclusion ) >= 3
+ )
+ ORDER BY
+ spf.aa_sequence_id, spf.aa_feature_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains_ix.psql b/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains_ix.psql
new file mode 100644
index 000000000..404e82cc0
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains_ix.psql
@@ -0,0 +1,8 @@
+ CREATE INDEX SignalP1_ix
+ ON :SCHEMA.SignalPeptideDomains (aa_sequence_id)
+ ;
+
+ CREATE INDEX SignalP2_ix
+ ON :SCHEMA.SignalPeptideDomains (gene_source_id, transcript_source_id, end_max)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql
new file mode 100644
index 000000000..4c50716bf
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql
@@ -0,0 +1,33 @@
+drop table if exists :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp
+
+/*
+ATTENTION: This script is used to make an empty table. It will be populated with a perl script.
+
+We make the table here in the usual way so we get partitioned webready table
+*/
+
+
+create unlogged table :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp (
+ location numeric(10),
+ type varchar(20),
+ na_sequence_id numeric(10),
+ is_unique numeric(1),
+ sum_cpm float8,
+ dist_to_first_atg numeric(10),
+ gene_source_id varchar(80),
+ transcript_source_id varchar(80),
+ dist_to_cds numeric(10),
+ is_dominant numeric(1),
+ strand char(1),
+ project_id varchar(20),
+ org_abbrev varchar(20),
+ modification_date timestamp
+ );
+
+
+:CREATE_AND_POPULATE
+select * from :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp
+:DECLARE_PARTITION;
+
+drop table :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript_ix.psql b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
new file mode 100644
index 000000000..52fa02bb4
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
@@ -0,0 +1,46 @@
+:CREATE_AND_POPULATE
+ SELECT DISTINCT
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ ga.source_id as gene_source_id,
+ ga.organism as organism,
+ ga.genus_species as species,
+ aef.source_id as probe_id,
+ aef.na_feature_id as tfbs_na_feature_id,
+ CASE
+ WHEN ga.is_reversed = 0
+ THEN round(abs(ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)),0)
+ ELSE round(abs(ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)),0)
+ END as distance,
+ CASE
+ WHEN /* distance > 0 */
+ CASE WHEN ga.is_reversed = 0
+ THEN ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)
+ ELSE ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)
+ END > 0
+ THEN
+ CASE
+ WHEN ga.is_reversed = 0
+ THEN '-'
+ ELSE '+'
+ END
+ ELSE
+ CASE
+ WHEN ga.is_reversed = 1
+ THEN '-'
+ ELSE '+'
+ END
+ END as direction
+ -- , aef.*
+ FROM dots.BindingSiteFeature aef,
+ apidb.FeatureLocation arrloc,
+ :SCHEMA.GeneAttributes ga
+ WHERE aef.na_feature_id = arrloc.na_feature_id
+ AND arrloc.na_sequence_id = ga.na_sequence_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+ AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000)
+ or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) )
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TFBSGene_ix.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene_ix.psql
new file mode 100644
index 000000000..7eee68e1d
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TFBSGene_ix.psql
@@ -0,0 +1,6 @@
+ create index tfbs_geneid_idx ON :SCHEMA.TFBSGene (gene_source_id, tfbs_na_feature_id)
+ ;
+
+ create index geneid_tfbs_idx ON :SCHEMA.TFBSGene (tfbs_na_feature_id,gene_source_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql b/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql
new file mode 100644
index 000000000..22bf73943
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql
@@ -0,0 +1,43 @@
+drop table if exists :SCHEMA.taxonOfInterest_:ORG_ABBREV;
+
+create unlogged table :SCHEMA.taxonOfInterest_:ORG_ABBREV as
+select distinct ens.taxon_id
+from dots.est e,apidb.datasource ds, apidb.organism o,
+ sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens,
+ sres.externaldatabase ed, sres.ontologyterm oterm
+WhERE e.na_sequence_id = ens.na_sequence_id
+ AND ens.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ens.sequence_ontology_id = oterm.ontology_term_id
+ AND ed.name = ds.name
+ and ds.taxon_id = o.taxon_id
+ and o.is_reference_strain = 1
+ and o.taxon_id = :TAXON_ID
+ union
+ select :TAXON_ID
+;
+
+-- recursively walk taxon tree to find ancestor with rank "species"
+-- Update this to select max/min level with rank species if there are multiple
+:CREATE_AND_POPULATE
+ WITH RECURSIVE cte AS (
+ SELECT TAXON_ID, taxon_id as parent_id, 1 as lvl
+ FROM sres.taxon
+ WHERE taxon_id IN (SELECT taxon_id from :SCHEMA.taxonOfInterest_:ORG_ABBREV)
+ UNION ALL
+ SELECT cte.taxon_id, sub.parent_id, lvl + 1
+ FROM cte, sres.taxon sub
+ WHERE cte.parent_id = sub.taxon_id
+ )
+ SELECT c.taxon_id, c.parent_id as species_taxon_id,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date
+ FROM cte c, sres.taxon t
+ WHERE t.taxon_id = c.parent_id
+ AND t.rank='species'
+:DECLARE_PARTITION;
+
+drop table if exists :SCHEMA.taxonOfInterest_:ORG_ABBREV;
+
+
diff --git a/Model/lib/psql/webready/orgSpecific/TaxonSpecies_ix.psql b/Model/lib/psql/webready/orgSpecific/TaxonSpecies_ix.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
new file mode 100644
index 000000000..45422830b
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
@@ -0,0 +1,30 @@
+DROP TABLE IF EXISTS :SCHEMA.TaxonTree_:ORG_ABBREV;
+
+CREATE TABLE :SCHEMA.TaxonTree_:ORG_ABBREV as
+ SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id,
+ cast(tn.name as varchar(80)) as name,
+ cast(t.rank as varchar(24)) as rank
+ FROM sres.Taxon t, sres.TaxonName tn
+ WHERE t.taxon_id = tn.taxon_id
+ AND tn.name_class = 'scientific name'
+ AND t.taxon_id = :TAXON_ID
+ ;
+
+:CREATE_AND_POPULATE
+ WITH RECURSIVE cte AS (
+ SELECT tt.*, name as organism, ARRAY[taxon_id::numeric] as path
+ FROM :SCHEMA.TaxonTree_:ORG_ABBREV tt
+ WHERE taxon_id = :TAXON_ID
+ UNION
+ SELECT tt.*, cte.organism, cte.path || tt.taxon_id as path
+ FROM :SCHEMA.TaxonTree_:ORG_ABBREV tt, cte
+ WHERE cte.parent_id = tt.taxon_id
+ AND tt.name != 'root'
+ )
+ SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
+ FROM (SELECT cte.* FROM cte ORDER BY path) t
+:DECLARE_PARTITION;
+
+
+DROP TABLE :SCHEMA.TaxonTree_:ORG_ABBREV;
diff --git a/Model/lib/psql/webready/orgSpecific/Taxonomy_ix.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy_ix.psql
new file mode 100644
index 000000000..e7005f831
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/Taxonomy_ix.psql
@@ -0,0 +1,5 @@
+ create index tax_ix
+ on :SCHEMA.Taxonomy
+ (organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
new file mode 100644
index 000000000..c6ca34e89
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -0,0 +1,455 @@
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp;
+
+ CREATE UNLOGGED table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp AS
+ select na_feature_id,
+ substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id,
+ substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal
+ FROM (SELECT DISTINCT t.na_feature_id, dr.primary_identifier as uniprot_id
+ FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t,
+ dots.genefeature gf, dots.nasequence nas,
+ sres.ExternalDatabase d, sres.ExternalDatabaseRelease r
+ WHERE dr.db_ref_id = x.DB_REF_ID
+ AND t.parent_id = gf.na_feature_id
+ and gf.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
+ AND (x.na_feature_id = t.na_feature_id --
+ or x.na_feature_id = t.parent_id)
+ AND dr.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND (d.name like '%uniprot_dbxref_RSRC'
+ OR d.name like '%dbxref_gene2Uniprot_RSRC'
+ OR d.name = 'Links to Uniprot Genes'
+ OR d.name like '%_dbxref_uniprot_linkout_RSRC'
+ OR d.name like '%_dbxref_uniprot_from_annotation_RSRC')
+ ) t
+ GROUP BY na_feature_id
+
+ ;
+
+:CREATE_AND_POPULATE
+ WITH genefeat AS (
+ SELECT DISTINCT
+ -- first the gene attributes:
+ gf.source_id AS gene_source_id,
+ gf.na_feature_id AS gene_na_feature_id,
+ LEAST(nl.start_min, nl.end_max) AS gene_start_min,
+ GREATEST(nl.start_min, nl.end_max) AS gene_end_max,
+ COALESCE(preferred_name.name, any_name.name) AS gene_name,
+ cast(coalesce(preferred_gene_product.product, any_gene_product.product, gf.product)
+ as VARCHAR(300)) as old_gene_product,
+ COALESCE(gp.product, 'unspecified product') as gene_product,
+ REPLACE(so.name, '_', ' ') AS gene_type,
+ gf.name as gene_ebi_biotype,
+ gi.gene_id,
+ transcripts.gene_transcript_count,
+ exons.gene_exon_count,
+ olds.old_ids AS gene_previous_ids,
+ coalesce(deprecated.is_deprecated, 0) as is_deprecated,
+ GREATEST(1, least(nl.start_min, nl.end_max) - 15000) AS gene_context_start,
+ LEAST(gsa.length, greatest(nl.start_min, nl.end_max) + 15000) AS gene_context_end,
+ GREATEST(1, least(nl.start_min, nl.end_max) - 1500) AS gene_zoom_context_start,
+ LEAST(gsa.length, greatest(nl.start_min, nl.end_max) + 1500) AS gene_zoom_context_end,
+ CAST(orthologs.name AS VARCHAR(60)) AS orthomcl_name,
+ -- coalesce(tothtssnps.total_hts_snps,0) AS gene_total_hts_snps,
+ -- coalesce(tothtssnps.hts_nonsynonymous_snps,0) AS gene_hts_nonsynonymous_snps,
+ -- coalesce(tothtssnps.hts_stop_codon_snps,0) AS gene_hts_stop_codon_snps,
+ -- coalesce(tothtssnps.hts_noncoding_snps,0) AS gene_hts_noncoding_snps,
+ -- coalesce(tothtssnps.hts_synonymous_snps,0) AS gene_hts_synonymous_snps,
+ -- coalesce(tothtssnps.hts_nonsyn_syn_ratio,0) AS gene_hts_nonsyn_syn_ratio,
+ CAST(cmnt.comment_string AS VARCHAR(300)) AS comment_string,
+ entrez_table.entrez_id AS gene_entrez_id,
+ gloc.locations AS gene_locations,
+ CAST(gsa.source_id AS VARCHAR(50)) AS sequence_id,
+ CAST(SUBSTR(tn.name, 1, 80) AS VARCHAR(80)) AS organism,
+ CAST(species_name.name AS VARCHAR(60)) AS species,
+ LTRIM(REGEXP_REPLACE(tn.name, replace (replace (species_name.name,'[',''), ']','') ,'')) AS strain,
+ taxon.ncbi_tax_id, tn.taxon_id,
+ so.source_id as so_id,
+ CAST(so.name AS VARCHAR(150)) AS so_term_name,
+ CAST(SUBSTR(so.definition, 1, 150) AS VARCHAR(150)) AS so_term_definition,
+ CAST(soRls.version AS VARCHAR(7)) AS so_version,
+ rt2.anticodon rt2_anticodon,
+ ed.name AS external_db_name,
+ edr.version AS external_db_version,
+ edr.external_database_release_id AS external_db_rls_id,
+ CAST(gsa.chromosome AS VARCHAR(20)) AS chromosome,
+ gsa.sequence_type,
+ gsa.chromosome_order_num, gsa.na_sequence_id
+ FROM dots.GeneFeature gf
+ INNER JOIN dots.nasequence seq ON seq.na_sequence_id = gf.na_sequence_id and seq.taxon_id = :TAXON_ID
+ INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id
+ INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id
+ INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV'
+ LEFT JOIN :SCHEMA.GeneProduct gp ON gf.source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV'
+ INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id
+ INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id
+ INNER JOIN :SCHEMA.GenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id and gsa.org_abbrev = ':ORG_ABBREV'
+ INNER JOIN sres.TaxonName tn ON gsa.taxon_id = tn.taxon_id
+ INNER JOIN sres.Taxon ON gsa.taxon_id = taxon.taxon_id
+ INNER JOIN sres.externalDatabaseRelease soRls ON so.external_database_release_id = soRls.external_database_release_id
+ INNER JOIN (
+ SELECT DISTINCT gene AS source_id FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV'
+ ) gene ON gf.source_id = gene.source_id
+ LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id
+ LEFT JOIN :SCHEMA.TaxonSpecies ts ON gsa.taxon_id = ts.taxon_id and ts.org_abbrev = ':ORG_ABBREV'
+ LEFT JOIN dots.geneinstance gi ON gf.na_feature_id = gi.na_feature_id
+ INNER JOIN sres.TaxonName species_name ON ts.species_taxon_id = species_name.taxon_id
+ LEFT JOIN (
+ SELECT parent_id, count(*) AS gene_transcript_count
+ FROM dots.Transcript
+ GROUP BY parent_id
+ ) transcripts ON gf.na_feature_id = transcripts.parent_id
+ LEFT JOIN (
+ SELECT parent_id, count(*) AS gene_exon_count
+ FROM dots.ExonFeature
+ GROUP BY parent_id
+ ) exons ON gf.na_feature_id = exons.parent_id
+ LEFT JOIN (
+ SELECT nfc.na_feature_id, MAX(SUBSTR(nfc.comment_string, 300, 1)) AS comment_string
+ FROM dots.NaFeatureComment nfc
+ GROUP BY nfc.na_feature_id
+ ) cmnt ON gf.na_feature_id = cmnt.na_feature_id
+ LEFT JOIN (
+ SELECT distinct drnf.na_feature_id as gene_na_feature_id, 1 as is_deprecated
+ FROM dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name = 'gassAWB_dbxref_gene2Deprecated_RSRC'
+ ) deprecated ON gf.na_feature_id = deprecated.gene_na_feature_id
+ -- NEED NEW TUNING TABLE ONCE NGS SNP WORK IS DONE
+ -- LEFT JOIN (
+ -- SELECT gene_source_id, total_hts_snps, hts_nonsynonymous_snps, hts_stop_codon_snps,hts_noncoding_snps,hts_synonymous_snps,
+ -- case when (hts_nonsynonymous_snps is null) then 0
+ -- when (hts_synonymous_snps = 0) then 0
+ -- else round ((hts_nonsynonymous_snps/ hts_synonymous_snps), 2) end as hts_nonsyn_syn_ratio
+ -- FROM (
+ -- select gene_source_id,
+ -- count(*) as total_hts_snps,
+ -- sum(has_nonsynonymous_allele) as hts_nonsynonymous_snps,
+ -- sum(has_stop_codon) as hts_stop_codon_snps,
+ -- sum(is_noncoding_snp) as hts_noncoding_snps,
+ -- count(*) - sum(has_nonsynonymous_allele) - sum(has_stop_codon) - sum(is_noncoding_snp) as hts_synonymous_snps
+ -- FROM :ORG_ABBREVSnpAttributes
+ -- WHERE gene_source_id is not null
+ -- GROUP by gene_source_id
+ -- ) t
+ -- ) tothtssnps ON gf.source_id = tothtssnps.gene_source_id
+ LEFT JOIN (
+ SELECT ssg.sequence_id as gene_na_feature_id, sg.name
+ FROM dots.SequenceSequenceGroup ssg,
+ dots.sequencegroup sg, core.tableinfo ti
+ WHERE ssg.sequence_group_id = sg.sequence_group_id
+ AND ssg.source_table_id = ti.table_id
+ AND ti.name = 'GeneFeature'
+ ) orthologs ON gf.na_feature_id = orthologs.gene_na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(product) as product
+ FROM apidb.GeneFeatureProduct
+ WHERE is_preferred = 1
+ GROUP BY na_feature_id
+ ) preferred_gene_product ON gf.na_feature_id = preferred_gene_product.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(product) as product
+ FROM apidb.GeneFeatureProduct
+ GROUP BY na_feature_id
+ ) any_gene_product ON gf.na_feature_id = any_gene_product.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(name) as name
+ FROM apidb.GeneFeatureName
+ WHERE is_preferred = 1
+ GROUP BY na_feature_id
+ EXCEPT
+ -- suppress gene/name associations from the *DELETED_RSRC databases
+ SELECT gfn.na_feature_id, gfn.name
+ FROM apidb.GeneFeatureName gfn,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE gfn.external_database_release_id = edr.external_database_release_id
+ AND ed.external_database_id = edr.external_database_id
+ AND ed.name like '%DELETED_RSRC'
+ ) preferred_name ON gf.na_feature_id = preferred_name.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(name) as name
+ FROM apidb.GeneFeatureName
+ GROUP by na_feature_id
+ EXCEPT
+ -- suppress gene/name associations from the *DELETED_RSRC databases
+ SELECT gfn.na_feature_id, gfn.name
+ FROM apidb.GeneFeatureName gfn,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE gfn.external_database_release_id = edr.external_database_release_id
+ AND ed.external_database_id = edr.external_database_id
+ AND ed.name like '%DELETED_RSRC'
+ ) any_name ON gf.na_feature_id = any_name.na_feature_id
+ LEFT JOIN (
+ SELECT dbna.na_feature_id,
+ substr(string_agg(db.primary_identifier, ',' order by db.primary_identifier), 1, 300) as entrez_id
+ FROM sres.ExternalDatabaseRelease edr, sres.DbRef db,
+ dots.DbRefNaFeature dbna, sres.ExternalDatabase ed
+ WHERE edr.external_database_release_id = db.external_database_release_id
+ AND ed.external_database_id = edr.external_database_id
+ AND dbna.db_ref_id = db.db_ref_id
+ AND lower(ed.name) like '%entrez%'
+ GROUP BY dbna.na_feature_id
+ ) entrez_table ON gf.na_feature_id = entrez_table.na_feature_id
+ LEFT JOIN (
+ -- TODO: PERFORMANCE
+ SELECT drnf.na_feature_id,
+ substr(string_agg(dr.primary_identifier, ';' order by dr.primary_identifier), 1, 900) as old_ids
+ FROM dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE dr.primary_identifier is not null
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND edr.id_type = 'previous id'
+ GROUP BY drnf.na_feature_id
+ ) olds ON gf.na_feature_id = olds.na_feature_id
+ WHERE nl.is_top_level = 1
+ AND nl.feature_type = 'GeneFeature'
+ AND gsa.taxon_id = :TAXON_ID
+ AND species_name.name_class = 'scientific name'
+ AND (gf.is_predicted != 1 OR gf.is_predicted is null)
+ AND tn.name_class = 'scientific name'
+ AND tn.taxon_id NOT IN (SELECT o.taxon_id FROM apidb.Organism o WHERE o.is_annotated_genome=0)
+ -- JB: NOT SURE WHY THERE ARE COMMENTED. (if needed, move to model)
+ --AND tn.name not in ('Plasmodium gallinaceum','Plasmodium reichenowi')
+ ), transcript AS (
+ SELECT DISTINCT
+ t.parent_id as transcript_parent_id,
+ -- first the gene attributes:
+ cast(null as varchar(80)) as representative_transcript,
+ 0 as gene_paralog_number, 0 as gene_ortholog_number,
+ transcript_uniprot.uniprot_id, transcript_uniprot.uniprot_id_internal,
+ -- next the transcript attributes:
+ t.source_id AS transcript_source_id,
+ tso.name as transcript_type,
+ t.na_feature_id,
+ CAST(coalesce(preferred_tx_product.product, any_tx_product.product, t.product)
+ AS VARCHAR(300))
+ AS transcript_product,
+ tl.start_min, tl.end_max,
+ tl.is_reversed, --CHECK if needed
+ CASE coalesce(tl.is_reversed, 0) WHEN 0 THEN 'forward' WHEN 1 THEN 'reverse' ELSE tl.is_reversed::varchar END AS strand,
+ CASE t.is_pseudo WHEN null THEN 0 ELSE t.is_pseudo END as is_pseudo,
+ transcript_exons.exon_count,
+ sns.length AS length, sns.na_sequence_id as spliced_na_sequence_id,
+ rt1.anticodon rt1_anticodon,
+ utr_lengths.five_prime_utr_length,
+ utr_lengths.three_prime_utr_length
+ FROM dots.Transcript t
+ INNER JOIN dots.genefeature gf on t.parent_id = gf.na_feature_id
+ INNER JOIN dots.nasequence nas on gf.na_sequence_id = nas.na_sequence_id and nas.taxon_id = :TAXON_ID
+ LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id
+ LEFT JOIN dots.SplicedNaSequence sns ON t.na_sequence_id = sns.na_sequence_id
+ INNER JOIN sres.OntologyTerm tso ON t.sequence_ontology_id = tso.ontology_term_id
+ INNER JOIN apidb.TranscriptLocation tl ON tl.feature_source_id = t.source_id
+ INNER JOIN (
+ SELECT rna_feature_id, count(*) as exon_count
+ FROM dots.RnaFeatureExon
+ GROUP BY rna_feature_id
+ ) transcript_exons ON t.na_feature_id = transcript_exons.rna_feature_id
+ INNER JOIN (
+ SELECT t.na_feature_id as transcript_na_feature_id, five_prime.utr_length as five_prime_utr_length, three_prime.utr_length as three_prime_utr_length
+ FROM dots.transcript t
+ LEFT JOIN (
+ SELECT parent_id, sum(end_max-start_min + 1) as utr_length FROM apidb.UtrLocation
+ WHERE direction = 5 AND is_top_level = 1
+ GROUP BY parent_id
+ ) five_prime ON t.na_feature_id = five_prime.parent_id
+ LEFT JOIN (
+ SELECT parent_id, sum(end_max-start_min + 1) as utr_length FROM apidb.UtrLocation
+ WHERE direction = 3 AND is_top_level = 1
+ GROUP BY parent_id
+ ) three_prime ON t.na_feature_id = three_prime.parent_id
+ ) utr_lengths ON t.na_feature_id = utr_lengths.transcript_na_feature_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp transcript_uniprot ON t.na_feature_id = transcript_uniprot.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(product) as product
+ FROM apidb.TranscriptProduct
+ WHERE is_preferred = 1
+ GROUP BY na_feature_id
+ ) preferred_tx_product ON t.na_feature_id = preferred_tx_product.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(product) as product
+ FROM apidb.TranscriptProduct
+ GROUP BY na_feature_id
+ ) any_tx_product ON t.na_feature_id = any_tx_product.na_feature_id
+ WHERE
+ tl.is_top_level=1
+ )
+ SELECT DISTINCT
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ transcript.transcript_source_id AS source_id,
+ -- first the gene attributes:
+ genefeat.gene_source_id,
+ genefeat.gene_na_feature_id,
+ genefeat.gene_start_min,
+ genefeat.gene_end_max,
+ genefeat.gene_name,
+ CAST(COALESCE(genefeat.old_gene_product, transcript.transcript_product,
+ case when transcript.is_pseudo = 1
+ then 'pseudogene'
+ else 'unspecified product'
+ end
+ ) as VARCHAR(300)
+ ) as old_gene_product,
+-- genefeat.gene_product,
+ genefeat.gene_type,
+ genefeat.gene_ebi_biotype,
+ genefeat.gene_id,
+ genefeat.gene_transcript_count,
+ genefeat.gene_exon_count,
+ cast(null as varchar(80)) as representative_transcript,
+ genefeat.gene_previous_ids,
+ genefeat.is_deprecated,
+ 0 as gene_paralog_number, 0 as gene_ortholog_number,
+ genefeat.gene_context_start,
+ genefeat.gene_context_end,
+ genefeat.gene_zoom_context_start,
+ genefeat.gene_zoom_context_end,
+ genefeat.orthomcl_name,
+ -- genefeat.gene_total_hts_snps,
+ -- genefeat.gene_hts_nonsynonymous_snps,
+ -- genefeat.gene_hts_stop_codon_snps,
+ -- genefeat.gene_hts_noncoding_snps,
+ -- genefeat.gene_hts_synonymous_snps,
+ -- genefeat.gene_hts_nonsyn_syn_ratio,
+ genefeat.comment_string,
+ transcript.uniprot_id, transcript.uniprot_id_internal,
+ genefeat.gene_entrez_id,
+ genefeat.gene_locations,
+ -- next the transcript attributes:
+ transcript.transcript_source_id,
+ transcript.transcript_type,
+ transcript.na_feature_id,
+ CAST(coalesce(transcript.transcript_product, genefeat.old_gene_product,
+ case when transcript.is_pseudo = 1
+ then 'pseudogene'
+ else 'unspecified product'
+ end
+ )AS VARCHAR(300)
+ ) as transcript_product,
+ transcript.start_min, transcript.end_max,
+ transcript.is_reversed, --CHECK if needed
+ transcript.strand,
+ transcript.is_pseudo,
+ transcript.exon_count,
+ transcript.length,
+ transcript.spliced_na_sequence_id,
+ genefeat.sequence_id,
+ genefeat.organism,
+ genefeat.species,
+ genefeat.strain,
+ genefeat.ncbi_tax_id, genefeat.taxon_id,
+ genefeat.so_id,
+ genefeat.so_term_name,
+ genefeat.so_term_definition,
+ genefeat.so_version,
+ CAST(coalesce(rt1_anticodon, rt2_anticodon)AS VARCHAR(3)) AS anticodon,
+ genefeat.external_db_name,
+ genefeat.external_db_version,
+ genefeat.external_db_rls_id,
+ genefeat.chromosome,
+ genefeat.sequence_type,
+ genefeat.chromosome_order_num, genefeat.na_sequence_id,
+ --next the protein attributes:
+ pa.source_id AS protein_source_id,
+ pa.aa_sequence_id,
+ pa.cds_start as coding_start,
+ pa.cds_end as coding_end,
+ pa.cds_length,
+ pa.protein_length,
+ pa.has_seqedit,
+ pa.tm_count,
+ pa.molecular_weight,
+ pa.isoelectric_point,
+ pa.signalp_peptide,
+ pa.ec_numbers,
+ --pa.ec_numbers_derived,
+ pa.annotated_go_component,
+ pa.annotated_go_function,
+ pa.annotated_go_process,
+ pa.predicted_go_component,
+ pa.predicted_go_function,
+ pa.predicted_go_process,
+ pa.annotated_go_id_component,
+ pa.annotated_go_id_function,
+ pa.annotated_go_id_process,
+ pa.predicted_go_id_component,
+ pa.predicted_go_id_function,
+ pa.predicted_go_id_process,
+ transcript.five_prime_utr_length,
+ transcript.three_prime_utr_length
+ FROM genefeat
+ INNER JOIN transcript ON genefeat. gene_na_feature_id = transcript.transcript_parent_id
+ LEFT JOIN :SCHEMA.proteinattributes pa ON pa.org_abbrev = ':ORG_ABBREV' and transcript.transcript_source_id = pa.transcript_source_id AND pa.rank_in_transcript = 1
+ ORDER BY taxon_id, source_id
+
+
+:DECLARE_PARTITION;
+
+
+ -- ADDED ORG_ABBREV filter in 2 places just in case
+ UPDATE :SCHEMA.TranscriptAttributes ta
+ SET exon_count = (SELECT count(*) + 1 FROM apidb.IntronLocation il WHERE ta.org_abbrev = ':ORG_ABBREV' AND il.parent_id = ta.na_feature_id AND il.end_max - il.start_min + 1 > 10 )
+ WHERE ta.project_id = 'TriTrypDB'
+ AND ta.org_abbrev = ':ORG_ABBREV'
+
+ ;
+
+
+
+ UPDATE :SCHEMA.TranscriptAttributes gaup
+ SET gene_paralog_number = (
+ SELECT count(distinct gene_source_id)
+ FROM :SCHEMA.TranscriptAttributes g1
+ WHERE g1.orthomcl_name = gaup.orthomcl_name
+ AND g1.organism = gaup.organism
+ AND gaup.gene_source_id != g1.gene_source_id
+ AND g1.org_abbrev = ':ORG_ABBREV'
+ ),
+ gene_ortholog_number = (
+ SELECT count(distinct gene_source_id)
+ FROM :SCHEMA.TranscriptAttributes g1
+ WHERE g1.orthomcl_name = gaup.orthomcl_name
+ AND g1.organism != gaup.organism
+ AND g1.org_abbrev = ':ORG_ABBREV'
+ )
+ WHERE (gaup.gene_type = 'protein coding' or gaup.gene_type = 'protein coding gene')
+ and gaup.org_abbrev = ':ORG_ABBREV'
+
+ ;
+
+
+-- TODO: test this
+ UPDATE :SCHEMA.TranscriptAttributes
+ SET gene_id = gene_na_feature_id + (select nextval('dots.gene_sq'))
+ WHERE gene_id is null
+ and org_abbrev = ':ORG_ABBREV'
+
+ ;
+
+
+ UPDATE :SCHEMA.TranscriptAttributes
+ SET representative_transcript = (
+ select min(source_id)
+ from :SCHEMA.TranscriptAttributes ga
+ where ga.gene_source_id = :SCHEMA.TranscriptAttributes.gene_source_id
+ and org_abbrev = ':ORG_ABBREV'
+ )
+ WHERE representative_transcript is null
+ AND gene_id is not null
+ and org_abbrev = ':ORG_ABBREV'
+ ;
+
+
+ UPDATE :SCHEMA.TranscriptAttributes
+ SET representative_transcript = source_id
+ WHERE representative_transcript is null
+ and org_abbrev = ':ORG_ABBREV'
+ ;
+
+drop table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql
new file mode 100644
index 000000000..d0eee715f
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql
@@ -0,0 +1,71 @@
+ CREATE UNIQUE INDEX TranscriptAttr_sourceId
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, source_id)
+ ;
+
+ CREATE UNIQUE INDEX TranscriptAttr_srcPrj
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, source_id, gene_source_id, project_id)
+ ;
+
+ CREATE UNIQUE INDEX TranscriptAttr_genesrc
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_source_id, source_id, project_id)
+ ;
+
+ CREATE UNIQUE INDEX TranscriptAttr_exon_ix
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_exon_count, source_id, gene_source_id, project_id)
+ ;
+
+ CREATE UNIQUE INDEX TranscriptAttr_loc_ix
+ ON :SCHEMA.TranscriptAttributes
+ (org_abbrev, na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id,
+ is_deprecated, source_id, gene_source_id, project_id)
+ ;
+
+ CREATE UNIQUE INDEX TranscriptAttr_feat_ix
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, na_feature_id, source_id, gene_source_id, project_id)
+ ;
+
+ CREATE UNIQUE INDEX TranscriptAttr_geneid_ix
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_id, source_id, gene_source_id, project_id)
+ ;
+
+ CREATE UNIQUE INDEX TransAttr_orthoname_ix
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
+ ;
+
+ CREATE UNIQUE INDEX TransAttr_molwt_ix
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, taxon_id, molecular_weight, source_id, gene_source_id, project_id)
+ ;
+
+ CREATE INDEX TransAttr_ortholog_ix
+ ON :SCHEMA.TranscriptAttributes
+ (source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id)
+ ;
+
+ CREATE INDEX TransAttr_orgsrc_ix
+ ON :SCHEMA.TranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max)
+ ;
+
+ CREATE INDEX TransAttr_lwrsrc_ix
+ ON :SCHEMA.TranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id)
+ ;
+
+ CREATE INDEX TransAttr_species_ix
+ ON :SCHEMA.TranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id)
+ ;
+
+ CREATE UNIQUE INDEX TrnscrptAttr_geneinfo
+ ON :SCHEMA.TranscriptAttributes
+ (org_abbrev, gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id,
+ protein_source_id, na_sequence_id, length, protein_length,
+ five_prime_utr_length, three_prime_utr_length)
+ ;
+
+ CREATE UNIQUE INDEX TranscriptAttr_genenaf
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_na_feature_id, gene_source_id, source_id, project_id)
+ ;
+
+ CREATE INDEX TransAttr_locsIds_ix
+ ON :SCHEMA.TranscriptAttributes
+ (na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance.psql b/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance.psql
new file mode 100644
index 000000000..f1f905600
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance.psql
@@ -0,0 +1,18 @@
+:CREATE_AND_POPULATE
+ SELECT DISTINCT
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ tl.feature_source_id AS transcript,
+ LEAST(ABS(mfl.start_min - tl.end_max),
+ ABS(mfl.end_max - tl.start_min)) AS centromere_distance,
+ tl.sequence_source_id AS genomic_sequence
+ FROM apidb.TranscriptLocation tl, apidb.FeatureLocation mfl,
+ dots.nasequence nas, sres.OntologyTerm so
+ WHERE tl.na_sequence_id = mfl.na_sequence_id
+ AND mfl.feature_type = 'Miscellaneous'
+ AND mfl.sequence_ontology_id = so.ontology_term_id
+ AND so.name = 'centromere'
+ AND tl.is_top_level = 1
+ and mfl.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance_ix.psql
new file mode 100644
index 000000000..25e656ced
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance_ix.psql
@@ -0,0 +1,4 @@
+ create index GCent_loc_ix
+ on :SCHEMA.TranscriptCenDistance (genomic_sequence, centromere_distance)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql b/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql
new file mode 100644
index 000000000..e979f9498
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql
@@ -0,0 +1,35 @@
+
+DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptECTmp;
+/*
+
+ATTENTION: This table is empty. We will populate it in the comparative graph so that we can include ortho-derived EC numbers.
+We are creating them it so that they it is partitioned.
+*/
+
+/* This table maps EC numbers to transcripts */
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptECTmp (
+ source_id VARCHAR(80),
+ gene_source_id VARCHAR(80),
+ enzyme_class_id NUMERIC(12),
+ ec_number VARCHAR(16),
+ ec_number_1 NUMERIC(3),
+ ec_number_2 NUMERIC(3),
+ ec_number_3 NUMERIC(3),
+ ec_number_4 NUMERIC(3),
+ wildcard_count NUMERIC(3),
+ evidence_code VARCHAR(255),
+
+ -- fields required for partitioning
+ project_id VARCHAR(20),
+ org_abbrev VARCHAR(20),
+ modification_date timestamp
+
+);
+
+
+:CREATE_AND_POPULATE
+select tet.* from :SCHEMA.:ORG_ABBREVTranscriptECTmp tet
+:DECLARE_PARTITION;
+
+DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptECTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql
new file mode 100644
index 000000000..5b9767528
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql
@@ -0,0 +1,5 @@
+CREATE INDEX TranscriptEc_1_ix on :SCHEMA.TranscriptEC (source_id, ec_number)
+ ;
+
+CREATE INDEX TranscriptEc_2_ix on :SCHEMA.TranscriptEC (ec_number, source_id)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql
new file mode 100644
index 000000000..09facffb8
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql
@@ -0,0 +1,23 @@
+
+DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp;
+/*
+
+ATTENTION: This table is empty. We will populate it in the comparative graph
+We are creating them it so that they it is partitioned.
+*/
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp (
+ source_id VARCHAR(80),
+ gene_id VARCHAR(80),
+ group_id VARCHAR(16),
+ project_id varchar(20),
+ org_abbrev varchar(20),
+ modification_date timestamp
+);
+
+:CREATE_AND_POPULATE
+SELECT ogt.* from :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp ogt
+:DECLARE_PARTITION;
+
+
+DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql
new file mode 100644
index 000000000..ac9b3670e
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql
@@ -0,0 +1,2 @@
+alter table :SCHEMA.TranscriptOrthologGroup
+ add constraint TranscriptOrthologGroup_pk primary key (source_id);
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
new file mode 100644
index 000000000..f63449f73
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
@@ -0,0 +1,40 @@
+DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp;
+/*
+
+ATTENTION: This table is empty. We will populate it in the comparative graph so that we can include ortho-derived EC numbers.
+We are creating it here so that it is partitioned.
+*/
+
+/* This is the final TranscriptPathway table */
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp (
+ -- transcript information
+ source_id VARCHAR(80),
+ gene_source_id VARCHAR(80),
+
+ --pathway information
+ pathway_source_id VARCHAR(50),
+ pathway_name VARCHAR(150),
+ pathway_id NUMERIC(12),
+ pathway_source TEXT, -- TODO should this be a varchar?
+ external_database_release_id NUMERIC(10),
+
+ --match information
+ ec_number_gene VARCHAR(16),
+ wildcard_count_gene NUMERIC(3),
+ ec_number_pathway VARCHAR(16),
+ wildcard_count_pathway NUMERIC(3),
+ exact_match NUMERIC(3),
+ complete_ec NUMERIC(3),
+
+ -- fields required for partioning
+ project_id varchar(20),
+ org_abbrev varchar(20),
+ modification_date timestamp
+);
+
+:CREATE_AND_POPULATE
+SELECT tpt.* from :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp tpt
+:DECLARE_PARTITION;
+
+DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptPathway_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway_ix.psql
new file mode 100644
index 000000000..92f77af43
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptPathway_ix.psql
@@ -0,0 +1,12 @@
+ create index TranscriptPath_ix
+ on :SCHEMA.TranscriptPathway
+ (gene_source_id, source_id, pathway_source_id,
+ pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway,
+ ec_number_pathway, pathway_source)
+ ;
+
+ create index TranscriptPathSource_ix
+ on :SCHEMA.TranscriptPathway (pathway_source,
+ gene_source_id, source_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptSequence.psql b/Model/lib/psql/webready/orgSpecific/TranscriptSequence.psql
new file mode 100644
index 000000000..edccc1d80
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptSequence.psql
@@ -0,0 +1,7 @@
+:CREATE_AND_POPULATE
+ SELECT ta.source_id, ta.project_id, ta.org_abbrev, sns.sequence, current_timestamp as modification_date
+ FROM :SCHEMA.TranscriptAttributes ta, dots.SplicedNaSequence sns
+ WHERE ta.source_id = sns.source_id
+ AND ta.org_abbrev = ':ORG_ABBREV'
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptSequence_ix.psql
new file mode 100644
index 000000000..98bb3137f
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptSequence_ix.psql
@@ -0,0 +1,3 @@
+ create index XScriptSeq_ix on :SCHEMA.TranscriptSequence (source_id, project_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql
new file mode 100644
index 000000000..0a6b669d8
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql
@@ -0,0 +1,26 @@
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ , ta.source_id as transcript_source_id
+ , ta.gene_source_id AS gene_source_id
+ , tmf.topology AS tmf_topology
+ , aal.start_min AS tmf_start_min
+ , aal.end_max AS tmf_end_max
+ , substr(tas.sequence, aal.end_max::INTEGER - aal.start_min::INTEGER + 1, aal.start_min::INTEGER) AS tmf_sequence
+ , tmf.aa_feature_id tmf_aa_feature_id
+ , tmf.aa_sequence_id tmf_aa_sequence_id
+ , tas.source_id as protein_source_id
+ FROM dots.aalocation aal
+ , :SCHEMA.transcriptattributes ta
+ , dots.translatedaafeature taf
+ , dots.translatedaasequence tas
+ , dots.transmembraneaafeature tmf
+ WHERE ta.na_feature_id = taf.na_feature_id
+ AND taf.aa_sequence_id = tas.aa_sequence_id
+ AND tas.aa_sequence_id = tmf.aa_sequence_id
+ and tmf.aa_feature_id = aal.aa_feature_id
+ and ta.org_abbrev = ':ORG_ABBREV'
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webready/orgSpecific/TransmembraneDomains_ix.psql b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains_ix.psql
new file mode 100644
index 000000000..964f23421
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains_ix.psql
@@ -0,0 +1,4 @@
+ create index TransDom1_ix
+ on :SCHEMA.TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology)
+ ;
+
diff --git a/Model/lib/psql/webready/unknown/AssociatedDataset.psql b/Model/lib/psql/webready/unknown/AssociatedDataset.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/unknown/DomainAssignment.psql b/Model/lib/psql/webready/unknown/DomainAssignment.psql
new file mode 100644
index 000000000..7cb927d97
--- /dev/null
+++ b/Model/lib/psql/webready/unknown/DomainAssignment.psql
@@ -0,0 +1,69 @@
+
+
+ create table DomainAssignment as
+ select sa.full_id, sa.group_name,
+ r.interpro_primary_id as accession,
+ r.interpro_desc as description,
+ CAST (NULL as NUMERIC) as domain_index,
+ sa.aa_sequence_id,
+ r.interpro_start_min as start_min,
+ r.interpro_end_min as end_max
+ from SequenceAttributes sa, apidb.interproresults r
+ where sa.full_id = r.protein_source_id
+ and upper(r.interpro_db_name) = 'PFAM'
+
+ ;
+
+
+
+ create index domain_accession_ix
+ on DomainAssignment (accession, full_id, group_name)
+
+ ;
+
+
+
+ create table domainIndex as
+ select row_number() OVER () as domain_index, accession
+ from (select distinct accession
+ from DomainAssignment
+ order by accession)
+
+ ;
+
+
+
+ create index domainIdxIdx on DomainIndex(accession, domain_index)
+
+ ;
+
+
+
+ update DomainAssignment da
+ set domain_index = (select domain_index
+ from DomainIndex
+ where accession = da.accession)
+
+ ;
+
+
+
+ create index domain_ix_ix
+ on DomainAssignment (domain_index, accession, full_id)
+
+ ;
+
+
+
+ create index domain_group_ix
+ on DomainAssignment (group_name, accession, full_id)
+
+ ;
+
+
+
+ create index domain_seq_ix
+ on DomainAssignment (aa_sequence_id, accession, full_id, group_name)
+
+ ;
+
diff --git a/Model/lib/psql/webready/unknown/EdaGeneGraph.psql b/Model/lib/psql/webready/unknown/EdaGeneGraph.psql
new file mode 100644
index 000000000..e69de29bb
diff --git a/Model/lib/psql/webready/unknown/PANResults.psql b/Model/lib/psql/webready/unknown/PANResults.psql
new file mode 100644
index 000000000..de67af481
--- /dev/null
+++ b/Model/lib/psql/webready/unknown/PANResults.psql
@@ -0,0 +1,91 @@
+
+
+ CREATE TABLE :ORG_ABBREVPANResults AS
+ SELECT DISTINCT r.*
+ FROM (
+ SELECT protocol_app_node_id as pan_id, 'Results::NAFeatureDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NAFeatureDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::ReporterIntensity' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterIntensity)
+ UNION
+ SELECT protocol_app_node_id, 'Results::SegmentResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SegmentResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::CompoundMassSpec' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.CompoundMassSpec)
+ UNION
+ SELECT protocol_app_node_id, 'Results::NaFeatureHostResponse' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NaFeatureHostResponse)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::ChrCopyNumber' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM ApiDB.ChrCopyNumber)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::GeneCopyNumber' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM ApiDB.GeneCopyNumber)
+ UNION
+ SELECT protocol_app_node_id, 'Results::NAFeatureExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NAFeatureExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::EditingEvent' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.EditingEvent)
+ UNION
+ SELECT protocol_app_node_id, 'Results::FamilyDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.FamilyDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::FamilyExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.FamilyExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::GeneDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::GeneExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::GeneSimilarity' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneSimilarity)
+ UNION
+ SELECT protocol_app_node_id, 'Results::ReporterDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::ReporterExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::RnaDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.RnaDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::RnaExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.RnaExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::LineageAbundance' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.LineageAbundance)
+ UNION
+ SELECT protocol_app_node_id, 'Results::SegmentDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SegmentDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::SeqVariation' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SeqVariation)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::SequenceVariation' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM study.ProtocolAppNode WHERE name like '% (Sequence Variation)')
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::MassSpecSummary' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.MASSSPECSUMMARY)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::IntronJunction' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.IntronJunction)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::RflpGenotype' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.RflpGenotype)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::RflpGenotypeNumber' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.RflpGenotypeNumber)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::CrisprPhenotype' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.crisprphenotype)
+ ) r, :ORG_ABBREVpanextdbrls panExtDbRls
+ WHERE r.pan_id = panExtDbRls.pan_id
+ AND (panExtDbRls.dataset_name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+
+ ;
+
diff --git a/Model/lib/psql/webready/unknown/ProjectTaxon.psql b/Model/lib/psql/webready/unknown/ProjectTaxon.psql
new file mode 100644
index 000000000..dd3d4ee56
--- /dev/null
+++ b/Model/lib/psql/webready/unknown/ProjectTaxon.psql
@@ -0,0 +1,151 @@
+
+
+ CREATE TABLE :ORG_ABBREVProjectTaxon AS
+ WITH
+ local_taxon -- a taxon found in this instance, either in dots.ExternalNaSequence or in apidb.Organism
+ AS ( SELECT distinct tn.name as taxon,
+ substr((tn.name), 1, position(' ' IN tn.name||' ') - 1) as first_word,
+ pi.name as project_id
+ FROM dots.ExternalNaSequence ens, sres.TaxonName tn, core.ProjectInfo pi
+ WHERE ens.taxon_id = tn.taxon_id
+ and ((tn.name not like 'Bodo %' and tn.name not like 'Drosophila %')
+ OR tn.name_class = 'scientific name')
+ and ens.row_project_id = pi.project_id
+ -- get names from apidb.Organism.family_name_for_files
+ -- (may not be necessary)
+ UNION
+ SELECT family_name_for_files as taxon,
+ substr((family_name_for_files), 1, position(' ' IN family_name_for_files||' ') - 1) as first_word,
+ project_name as project_id
+ FROM apidb.Organism
+ WHERE family_name_for_files in (select name from sres.TaxonName)
+ ),
+ mononym -- a taxon name that's the first word of a local taxon
+ AS (SELECT distinct lower(lt.first_word) as taxon, lt.project_id
+ FROM local_taxon lt, sres.TaxonName tn
+ WHERE lt.first_word = tn.name
+ -- and tn.name_class = 'scientific name'
+ ),
+ full_name -- the full name of a local taxon whose first name is not a taxon
+ AS (SELECT distinct lower(lt.taxon) as taxon, lt.project_id
+ FROM local_taxon lt, sres.TaxonName tn
+ WHERE lt.taxon = tn.name
+ -- and tn.name_class = 'scientific name'
+ and lower(lt.first_word) not in (select taxon from mononym))
+ SELECT * FROM mononym
+ UNION
+ SELECT * FROM full_name
+ UNION
+ SELECT 'hypocrea', 'FungiDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/afsm11', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/afsm2', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/amopi', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/asl1', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/de11d', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/de4a', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/frs/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gd-d1-1', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gd-d1-2', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gd-d1-3', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gillnor1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gillnor2/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gillrich3/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/lithon', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/net12afl/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/netc1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/netc2/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/neth2t3/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/np251002/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/nrss/ii', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/pal2', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/pao27/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/rp', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sed5a/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedc1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedcb1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedct1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedmh1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedst1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sm53', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sm68', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/st4n', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/st8v/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/su03', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/su4', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/tg1162', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/tg1267', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/tun1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/wt2708/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/wtuts/i', 'TriTrypDB'
+ UNION
+ SELECT 'plo_of_paramoeba_invadens_ags-2013', 'TriTrypDB'
+ UNION
+ SELECT 'soil_flagellate_and31', 'TriTrypDB'
+ UNION
+ SELECT 'kinetoplastid_flagellate_lfs2', 'TriTrypDB'
+ UNION
+ SELECT 'cryptaulaxoides-like_sp._tcs-2003', 'TriTrypDB'
+ ORDER BY 2, 1
+
+ ;
+
+
+
+ update :ORG_ABBREVProjectTaxon
+ set taxon = replace(taxon, '''', '')
+ where taxon like '%''%'
+
+ ;
+
+
+
+ create unique index data_load_prjct_err on :ORG_ABBREVProjectTaxon (taxon)
+
+ ;
+
+
+
+ create unique index projtax_ix on :ORG_ABBREVProjectTaxon (taxon, project_id)
+
+ ;
+
diff --git a/Model/lib/psql/webready/unknown/SequenceEnzymeClass.psql b/Model/lib/psql/webready/unknown/SequenceEnzymeClass.psql
new file mode 100644
index 000000000..f948ba46c
--- /dev/null
+++ b/Model/lib/psql/webready/unknown/SequenceEnzymeClass.psql
@@ -0,0 +1,25 @@
+:CREATE_AND_POPULATE
+ SELECT sa.project_id
+ , sa.org_abbrev
+ , current_timestamp as modification_date
+ , sa.full_id
+ , sa.group_name
+ -- , sec.uniprot_accession
+ , ec.ec_number
+ , ec.description
+ , ec.parent_id as ec_parent
+ , ec.ec_number_1
+ , ec.ec_number_2
+ , ec.ec_number_3
+ , ec.ec_number_4
+ FROM :SCHEMA.sequenceattributes sa
+ , dots.AASequence aa
+ , dots.AASequenceEnzymeClass sec
+ , sres.ENZYMECLASS ec
+ WHERE sa.aa_sequence_id = aa.aa_sequence_id
+ AND sec.aa_sequence_id = aa.aa_sequence_id
+ AND sec.enzyme_class_id = ec.enzyme_class_id
+ AND sa.org_abbrev = ':ORG_ABBREV'
+:DECLARE_PARTITION
+;
+
diff --git a/Model/lib/psql/webready/unknown/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webready/unknown/SequenceEnzymeClass_ix.psql
new file mode 100644
index 000000000..255767b47
--- /dev/null
+++ b/Model/lib/psql/webready/unknown/SequenceEnzymeClass_ix.psql
@@ -0,0 +1,7 @@
+
+ CREATE INDEX SequenceEnzymeClass_idx1 ON :SCHEMA.SequenceEnzymeClass (group_name, ec_number, description)
+ ;
+
+ CREATE INDEX SequenceEnzymeClass_idx2 ON :SCHEMA.SequenceEnzymeClass (full_id, ec_number, description)
+ ;
+
diff --git a/Model/lib/psql/webready/unknown/TypeAheadCounts.psql b/Model/lib/psql/webready/unknown/TypeAheadCounts.psql
new file mode 100644
index 000000000..921efa9bd
--- /dev/null
+++ b/Model/lib/psql/webready/unknown/TypeAheadCounts.psql
@@ -0,0 +1,42 @@
+
+
+CREATE TABLE TypeAheadCounts AS
+ (SELECT DISTINCT dr.primary_identifier as option_id,
+ count(distinct aaf.aa_sequence_id) AS protein_count
+ FROM dots.DbRefAaFeature draf, sres.DbRef dr, dots.aafeature aaf,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE draf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND aaf.aa_feature_id = draf.aa_feature_id
+ GROUP BY dr.primary_identifier
+ )
+ UNION
+ (SELECT DISTINCT ec.ec_number AS option_id,
+ COUNT(DISTINCT asec.aa_sequence_id) AS protein_count
+ FROM sres.enzymeClass ec,
+ dots.aaSequenceEnzymeClass asec
+ WHERE asec.enzyme_class_id = ec.enzyme_class_id
+ GROUP BY ec.ec_number)
+ UNION
+ (SELECT DISTINCT gt.source_id AS option_id,
+ COUNT(DISTINCT aaf.aa_sequence_id) AS protein_count
+ FROM dots.AaFeature aaf,
+ dots.GoAssociation ga, sres.OntologyTerm gt,
+ dots.GoAssociationInstance gai,core.TableInfo ti
+ WHERE aaf.aa_sequence_id = ga.row_id
+ AND ga.table_id = ti.table_id
+ AND ti.name = 'ExternalAASequence'
+ AND ga.go_term_id = gt.ontology_term_id
+ AND ga.go_association_id = gai.go_association_id
+ GROUP BY gt.source_id)
+
+ ;
+
+
+
+CREATE INDEX TypeAheadCounts_idx ON TypeAheadCounts (option_id)
+
+ ;
+
diff --git a/Model/lib/wdk/model/questions/params/geneParams.xml b/Model/lib/wdk/model/questions/params/geneParams.xml
index 142a6d1b6..4635e8221 100644
--- a/Model/lib/wdk/model/questions/params/geneParams.xml
+++ b/Model/lib/wdk/model/questions/params/geneParams.xml
@@ -8274,16 +8274,16 @@ products of your selected type (or types).
Samples:
' + || sample_table || chr(10) || + ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation + FROM ( + SELECT ds.name as name, ds.summary as description, pubs.contact_email as email, + pubs.pmids as publications, samples.sample_table as sample_table + FROM DatasetPresenter ds, pubs, samples + WHERE ds.dataset_presenter_id = pubs.id + AND ds.dataset_presenter_id = samples.id + ) t + ]]> + + + + + + +