From 55e982a133a3eec08cc79878b8b0c0a7ca2c3662 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 9 May 2025 12:51:53 -0400
Subject: [PATCH 001/112] add initial web table psql files
---
.../lib/psql/webtables/??/AlphaFoldGenes.psql | 103 +++
.../psql/webtables/??/AssociatedDataset.psql | 0
.../lib/psql/webtables/??/DatasetDetail.psql | 58 ++
.../psql/webtables/??/DatasetPresenter.psql | 0
.../psql/webtables/??/DomainAssignment.psql | 69 ++
Model/lib/psql/webtables/??/EdaGeneGraph.psql | 0
.../psql/webtables/??/EupathBuildDates.psql | 0
.../??/ExternalDbDatasetPresenter.psql | 46 +
.../??/ExternalSequenceTaxonRank.psql | 60 ++
.../psql/webtables/??/GeneGroupProfile.psql | 30 +
.../webtables/??/GenomicSeqAttributes.psql | 102 +++
.../??/GroupPhylogeneticProfile.psql | 20 +
.../webtables/??/OrthologousTranscripts.psql | 73 ++
Model/lib/psql/webtables/??/PANExtDbRls.psql | 36 +
Model/lib/psql/webtables/??/PANIO.psql | 59 ++
Model/lib/psql/webtables/??/PANResults.psql | 91 ++
.../psql/webtables/??/PhyleticPattern.psql | 395 +++++++++
Model/lib/psql/webtables/??/ProjectTaxon.psql | 151 ++++
.../psql/webtables/??/SequenceAttributes.psql | 131 +++
.../webtables/??/SequenceEnzymeClass.psql | 36 +
.../webtables/??/SequencePieceClosure.psql | 48 ++
.../psql/webtables/??/StudyIdDatasetId.psql | 24 +
.../psql/webtables/??/TypeAheadCounts.psql | 42 +
.../psql/webtables/MG/CompoundAttributes.psql | 28 +
Model/lib/psql/webtables/MG/CompoundId.psql | 39 +
.../psql/webtables/MG/CompoundProperties.psql | 29 +
.../psql/webtables/MG/CompoundTypeAheads.psql | 15 +
.../webtables/MG/GroupDomainAttribute.psql | 32 +
.../lib/psql/webtables/MG/OntologyLevels.psql | 48 ++
.../psql/webtables/MG/PathwayAttributes.psql | 68 ++
.../psql/webtables/MG/PathwayCompounds.psql | 91 ++
Model/lib/psql/webtables/MG/PathwayNodes.psql | 306 +++++++
.../psql/webtables/MG/PathwayReactions.psql | 141 +++
.../psql/webtables/MO/ChIPchipTranscript.psql | 44 +
.../webtables/MO/ChIPchipTranscript_ix.psql | 7 +
.../lib/psql/webtables/MO/ChrCopyNumbers.psql | 19 +
.../psql/webtables/MO/ChrCopyNumbers_ix.psql | 16 +
.../lib/psql/webtables/MO/CodingSequence.psql | 14 +
.../psql/webtables/MO/CodingSequence_ix.psql | 7 +
.../webtables/MO/DatasetExampleSourceId.psql | 23 +
.../MO/DatasetExampleSourceId_ix.psql | 0
Model/lib/psql/webtables/MO/EqtlSpan.psql | 25 +
Model/lib/psql/webtables/MO/EqtlSpan_ix.psql | 8 +
.../webtables/MO/EstAlignmentGeneSummary.psql | 68 ++
.../MO/EstAlignmentGeneSummary_ix.psql | 20 +
.../lib/psql/webtables/MO/EstAttributes.psql | 51 ++
.../psql/webtables/MO/EstAttributes_ix.psql | 7 +
Model/lib/psql/webtables/MO/EstSequence.psql | 16 +
.../lib/psql/webtables/MO/EstSequence_ix.psql | 7 +
.../lib/psql/webtables/MO/GeneAttributes.psql | 113 +++
.../psql/webtables/MO/GeneAttributes_ix.psql | 90 ++
.../psql/webtables/MO/GeneCopyNumbers.psql | 29 +
.../psql/webtables/MO/GeneCopyNumbers_ix.psql | 8 +
Model/lib/psql/webtables/MO/GeneGoTable.psql | 26 +
.../lib/psql/webtables/MO/GeneGoTable_ix.psql | 9 +
Model/lib/psql/webtables/MO/GeneGoTerms.psql | 39 +
.../lib/psql/webtables/MO/GeneGoTerms_ix.psql | 10 +
Model/lib/psql/webtables/MO/GeneId.psql | 265 ++++++
Model/lib/psql/webtables/MO/GeneId_ix.psql | 35 +
.../psql/webtables/MO/GeneIntJuncStats.psql | 25 +
.../webtables/MO/GeneIntJuncStats_ix.psql | 6 +
.../psql/webtables/MO/GeneIntronJunction.psql | 223 +++++
.../webtables/MO/GeneIntronJunction_ix.psql | 21 +
.../lib/psql/webtables/MO/GeneLocations.psql | 22 +
.../psql/webtables/MO/GeneLocations_ix.psql | 8 +
.../psql/webtables/MO/GeneMaxIntronGIJ.psql | 47 +
.../webtables/MO/GeneMaxIntronGIJ_ix.psql | 6 +
.../lib/psql/webtables/MO/GeneModelDump.psql | 32 +
.../psql/webtables/MO/GeneModelDump_ix.psql | 9 +
.../psql/webtables/MO/GeneSummaryFilter.psql | 14 +
.../webtables/MO/GeneSummaryFilter_ix.psql | 0
.../psql/webtables/MO/GenomicSequenceId.psql | 26 +
.../webtables/MO/GenomicSequenceId_ix.psql | 21 +
.../webtables/MO/GenomicSequenceSequence.psql | 14 +
.../MO/GenomicSequenceSequence_ix.psql | 7 +
.../lib/psql/webtables/MO/GoTermSummary.psql | 56 ++
.../psql/webtables/MO/GoTermSummary_ix.psql | 16 +
.../psql/webtables/MO/IntronSupportLevel.psql | 119 +++
.../webtables/MO/IntronSupportLevel_ix.psql | 0
.../psql/webtables/MO/IntronUtrCoords.psql | 35 +
.../psql/webtables/MO/IntronUtrCoords_ix.psql | 16 +
.../lib/psql/webtables/MO/NameMappingGIJ.psql | 125 +++
.../psql/webtables/MO/NameMappingGIJ_ix.psql | 6 +
.../webtables/MO/OrganismAbbreviation.psql | 15 +
.../MO/OrganismAbbreviationBlast.psql | 42 +
.../MO/OrganismAbbreviationBlast_ix.psql | 0
.../webtables/MO/OrganismAbbreviation_ix.psql | 0
.../psql/webtables/MO/OrganismAttributes.psql | 327 +++++++
.../webtables/MO/OrganismAttributes_ix.psql | 7 +
.../webtables/MO/OrganismSelectTaxonRank.psql | 49 ++
.../MO/OrganismSelectTaxonRank_ix.psql | 0
.../psql/webtables/MO/PathwayNodeGene.psql | 14 +
.../psql/webtables/MO/PathwayNodeGene_ix.psql | 0
.../psql/webtables/MO/PathwaysGeneTable.psql | 44 +
.../webtables/MO/PathwaysGeneTable_ix.psql | 9 +
.../lib/psql/webtables/MO/PdbSimilarity.psql | 32 +
.../psql/webtables/MO/PdbSimilarity_ix.psql | 0
Model/lib/psql/webtables/MO/Profile.psql | 800 ++++++++++++++++++
.../lib/psql/webtables/MO/ProfileSamples.psql | 167 ++++
.../psql/webtables/MO/ProfileSamples_ix.psql | 22 +
Model/lib/psql/webtables/MO/ProfileType.psql | 13 +
.../lib/psql/webtables/MO/ProfileType_ix.psql | 0
Model/lib/psql/webtables/MO/Profile_ix.psql | 24 +
.../psql/webtables/MO/ProteinAttributes.psql | 207 +++++
.../webtables/MO/ProteinAttributes_ix.psql | 14 +
.../psql/webtables/MO/ProteinSequence.psql | 15 +
.../psql/webtables/MO/ProteinSequence_ix.psql | 7 +
Model/lib/psql/webtables/MO/RnaSeqStats.psql | 55 ++
.../lib/psql/webtables/MO/RnaSeqStats_ix.psql | 0
.../webtables/MO/SignalPeptideDomains.psql | 41 +
.../webtables/MO/SignalPeptideDomains_ix.psql | 16 +
Model/lib/psql/webtables/MO/TFBSGene.psql | 46 +
Model/lib/psql/webtables/MO/TFBSGene_ix.psql | 14 +
Model/lib/psql/webtables/MO/TaxonSpecies.psql | 35 +
.../psql/webtables/MO/TaxonSpecies_ix.psql | 0
Model/lib/psql/webtables/MO/Taxonomy.psql | 28 +
Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 9 +
.../webtables/MO/TranscriptAttributes.psql | 440 ++++++++++
.../webtables/MO/TranscriptAttributes_ix.psql | 135 +++
.../webtables/MO/TranscriptCenDistance.psql | 19 +
.../MO/TranscriptCenDistance_ix.psql | 8 +
.../psql/webtables/MO/TranscriptPathway.psql | 112 +++
.../webtables/MO/TranscriptPathway_ix.psql | 18 +
.../psql/webtables/MO/TranscriptSequence.psql | 11 +
.../webtables/MO/TranscriptSequence_ix.psql | 7 +
.../webtables/MO/TransmembraneDomains.psql | 27 +
.../webtables/MO/TransmembraneDomains_ix.psql | 8 +
127 files changed, 7018 insertions(+)
create mode 100644 Model/lib/psql/webtables/??/AlphaFoldGenes.psql
create mode 100644 Model/lib/psql/webtables/??/AssociatedDataset.psql
create mode 100644 Model/lib/psql/webtables/??/DatasetDetail.psql
create mode 100644 Model/lib/psql/webtables/??/DatasetPresenter.psql
create mode 100644 Model/lib/psql/webtables/??/DomainAssignment.psql
create mode 100644 Model/lib/psql/webtables/??/EdaGeneGraph.psql
create mode 100644 Model/lib/psql/webtables/??/EupathBuildDates.psql
create mode 100644 Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql
create mode 100644 Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql
create mode 100644 Model/lib/psql/webtables/??/GeneGroupProfile.psql
create mode 100644 Model/lib/psql/webtables/??/GenomicSeqAttributes.psql
create mode 100644 Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql
create mode 100644 Model/lib/psql/webtables/??/OrthologousTranscripts.psql
create mode 100644 Model/lib/psql/webtables/??/PANExtDbRls.psql
create mode 100644 Model/lib/psql/webtables/??/PANIO.psql
create mode 100644 Model/lib/psql/webtables/??/PANResults.psql
create mode 100644 Model/lib/psql/webtables/??/PhyleticPattern.psql
create mode 100644 Model/lib/psql/webtables/??/ProjectTaxon.psql
create mode 100644 Model/lib/psql/webtables/??/SequenceAttributes.psql
create mode 100644 Model/lib/psql/webtables/??/SequenceEnzymeClass.psql
create mode 100644 Model/lib/psql/webtables/??/SequencePieceClosure.psql
create mode 100644 Model/lib/psql/webtables/??/StudyIdDatasetId.psql
create mode 100644 Model/lib/psql/webtables/??/TypeAheadCounts.psql
create mode 100644 Model/lib/psql/webtables/MG/CompoundAttributes.psql
create mode 100644 Model/lib/psql/webtables/MG/CompoundId.psql
create mode 100644 Model/lib/psql/webtables/MG/CompoundProperties.psql
create mode 100644 Model/lib/psql/webtables/MG/CompoundTypeAheads.psql
create mode 100644 Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
create mode 100644 Model/lib/psql/webtables/MG/OntologyLevels.psql
create mode 100644 Model/lib/psql/webtables/MG/PathwayAttributes.psql
create mode 100644 Model/lib/psql/webtables/MG/PathwayCompounds.psql
create mode 100644 Model/lib/psql/webtables/MG/PathwayNodes.psql
create mode 100644 Model/lib/psql/webtables/MG/PathwayReactions.psql
create mode 100644 Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
create mode 100644 Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
create mode 100644 Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/CodingSequence.psql
create mode 100644 Model/lib/psql/webtables/MO/CodingSequence_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
create mode 100644 Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/EqtlSpan.psql
create mode 100644 Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
create mode 100644 Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/EstAttributes.psql
create mode 100644 Model/lib/psql/webtables/MO/EstAttributes_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/EstSequence.psql
create mode 100644 Model/lib/psql/webtables/MO/EstSequence_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneAttributes.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneCopyNumbers.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneGoTable.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneGoTerms.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneId.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneId_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneIntJuncStats.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneIntronJunction.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneLocations.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneLocations_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneModelDump.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneSummaryFilter.psql
create mode 100644 Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GenomicSequenceId.psql
create mode 100644 Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
create mode 100644 Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/GoTermSummary.psql
create mode 100644 Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/IntronSupportLevel.psql
create mode 100644 Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/IntronUtrCoords.psql
create mode 100644 Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/NameMappingGIJ.psql
create mode 100644 Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
create mode 100644 Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
create mode 100644 Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/OrganismAttributes.psql
create mode 100644 Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
create mode 100644 Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/PathwayNodeGene.psql
create mode 100644 Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/PathwaysGeneTable.psql
create mode 100644 Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/PdbSimilarity.psql
create mode 100644 Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/Profile.psql
create mode 100644 Model/lib/psql/webtables/MO/ProfileSamples.psql
create mode 100644 Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/ProfileType.psql
create mode 100644 Model/lib/psql/webtables/MO/ProfileType_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/Profile_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/ProteinAttributes.psql
create mode 100644 Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/ProteinSequence.psql
create mode 100644 Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/RnaSeqStats.psql
create mode 100644 Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
create mode 100644 Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/TFBSGene.psql
create mode 100644 Model/lib/psql/webtables/MO/TFBSGene_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/TaxonSpecies.psql
create mode 100644 Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/Taxonomy.psql
create mode 100644 Model/lib/psql/webtables/MO/Taxonomy_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/TranscriptAttributes.psql
create mode 100644 Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/TranscriptCenDistance.psql
create mode 100644 Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/TranscriptPathway.psql
create mode 100644 Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/TranscriptSequence.psql
create mode 100644 Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/TransmembraneDomains.psql
create mode 100644 Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
diff --git a/Model/lib/psql/webtables/??/AlphaFoldGenes.psql b/Model/lib/psql/webtables/??/AlphaFoldGenes.psql
new file mode 100644
index 0000000000..164c150fcb
--- /dev/null
+++ b/Model/lib/psql/webtables/??/AlphaFoldGenes.psql
@@ -0,0 +1,103 @@
+
+
+ CREATE TABLE uniprotGenes AS
+ SELECT DISTINCT ed.name
+ , d.*
+ , edr.version
+ , aa.source_id
+ , pa.gene_source_id
+ , CASE WHEN (ed.name like '%SWISSPROT%' AND edr.version = 'xrefuniparc') THEN 1
+ WHEN (ed.name like '%SPTREMBL%' AND edr.version = 'xrefuniparc') THEN 2
+ WHEN (ed.name like '%SWISSPROT%' AND edr.version = 'xref_sprot_blastp') THEN 4
+ WHEN (ed.name like '%SPTREMBL%' and edr.version = 'xref_trembl_blastp') THEN 5
+ ELSE 6 END as rank
+ , (af.last_residue_index - af.first_residue_index + 1) as hit_length
+ FROM sres.dbref d
+ LEFT JOIN apidb.AlphaFold af ON d.primary_identifier = af.uniprot_id
+ , sres.externaldatabase ed
+ , sres.externaldatabaserelease edr
+ , dots.dbrefaafeature db
+ , dots.aafeature aa
+ , ProteinAttributes pa
+ WHERE (ed.name = 'Uniprot/SWISSPROT' OR ed.name = 'Uniprot/SPTREMBL')
+ AND (edr.version = 'xrefuniparc' OR edr.version = 'xref_sprot_blastp' OR edr.version = 'xref_trembl_blastp')
+ AND edr.external_database_id = ed.external_database_id
+ AND d.external_database_release_id = edr.external_database_release_id
+ AND db.db_ref_id = d.db_ref_id
+ AND aa.aa_feature_id = db.aa_feature_id
+ AND pa.source_id = aa.source_id
+ UNION
+ SELECT DISTINCT ed.name
+ , d.*
+ , edr.version
+ , na.source_id
+ , ta.gene_source_id
+ , 3 as rank
+ , (af.last_residue_index - af.first_residue_index + 1) as hit_length
+ FROM sres.dbref d
+ LEFT JOIN apidb.AlphaFold af ON d.primary_identifier = af.uniprot_id
+ , sres.externaldatabase ed
+ , sres.externaldatabaserelease edr
+ , dots.dbrefnafeature db
+ , dots.nafeature na
+ , TranscriptAttributes ta
+ WHERE ed.name like '%_dbxref_%niprot_%RSRC'
+ AND edr.external_database_id = ed.external_database_id
+ AND d.external_database_release_id = edr.external_database_release_id
+ AND db.db_ref_id = d.db_ref_id
+ AND na.na_feature_id = db.na_feature_id
+ AND (ta.transcript_source_id = na.source_id OR ta.gene_source_id = na.source_id)
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE minRank AS (
+ SELECT gene_source_id
+ , MIN(rank) as min_rank
+ FROM uniprotGenes upg
+ WHERE hit_length is not null
+ GROUP BY gene_source_id
+ )
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE alphaFoldHits AS (
+ SELECT DISTINCT gene_source_id
+ , last_value(primary_identifier) over (PARTITION BY gene_source_id ORDER BY hit_length ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS uniprot_id
+ FROM (
+ SELECT upg.*
+ FROM uniprotGenes upg
+ , minRank
+ WHERE upg.gene_source_id = minRank.gene_source_id
+ AND upg.rank = minRank.min_rank
+ ) t
+ )
+
+ ;
+
+
+
+ CREATE TABLE AlphaFoldGenes AS (
+ SELECT afh.gene_source_id
+ , af.uniprot_id
+ , af.source_id as alphafold_id
+ , af.alphafold_version
+ , af.first_residue_index
+ , af.last_residue_index
+ FROM apidb.alphafold af
+ , alphaFoldHits afh
+ WHERE afh.uniprot_id = af.uniprot_id
+ )
+
+ ;
+
+
+
+ CREATE index AlphaFoldGenes_idx ON AlphaFoldGenes (gene_source_id, uniprot_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/AssociatedDataset.psql b/Model/lib/psql/webtables/??/AssociatedDataset.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/??/DatasetDetail.psql b/Model/lib/psql/webtables/??/DatasetDetail.psql
new file mode 100644
index 0000000000..1ec5eed7be
--- /dev/null
+++ b/Model/lib/psql/webtables/??/DatasetDetail.psql
@@ -0,0 +1,58 @@
+
+
+ CREATE TABLE DatasetDetail AS
+ SELECT dataset_presenter_id,
+ name || ' ' || category || ' ' || usage || ' ' ||
+ caveat || ' ' || acknowledgement || ' ' || type || ' ' || subtype
+ ||' ' || summary || ' ' || description || ' ' || contact || ' ' ||
+ institution || ' ' || pubmed_id || ' ' || citation as search_string
+ FROM (
+ SELECT
+ sub.dataset_presenter_id as dataset_presenter_id,
+ sub.name as name,
+ sub.category as category,
+ sub.usage as usage,
+ sub.caveat as caveat,
+ sub.acknowledgement as acknowledgement,
+ sub.type as type,
+ sub.subtype as subtype,
+ sub.contact,
+ sub.institution,
+ sub.pubmed_id,
+ sub.citation,
+ dp.summary,
+ dp.description
+ FROM DatasetPresenter dp,
+ (
+ SELECT DISTINCT
+ dp.dataset_presenter_id as dataset_presenter_id,
+ dp.display_name as name,
+ dp.display_category as category,
+ dp.usage as usage,
+ dp.caveat as caveat,
+ dp.acknowledgement as acknowledgement,
+ dp.type as type,
+ dp.subtype as subtype,
+ dc.name as contact,
+ dc.affiliation as institution,
+ string_agg(dpub.pmid, ' ' ORDER BY dpub.pmid) as pubmed_id,
+ -- CHECK AND FIX - regexp_like ISSUE
+ --string_agg(CASE WHEN REGEXP_LIKE(dpub.citation, '[[:digit:]]{4};')
+ -- THEN substr(citation, 1, regexp_instr(citation, '[[:digit:]]{4};' ) - 1)
+ -- ELSE dpub.citation
+ -- END , ' ' ORDER BY dpub.citation) as citation
+ string_agg(dpub.citation, ' ' ORDER BY dpub.citation) as citation
+ FROM DatasetPresenter dp, DatasetContact dc,
+ DatasetPublication dpub
+ WHERE dp.dataset_presenter_id = dc.dataset_presenter_id
+ AND dp.dataset_presenter_id = dpub.dataset_presenter_id
+ AND dc.is_primary_contact = true
+ GROUP by dp.dataset_presenter_id, dp.display_name,dp.display_category,
+ dp.usage,dp.caveat,dp.acknowledgement,dp.type,dp.subtype,dc.name,
+ dc.affiliation
+ ) sub
+ WHERE dp.dataset_presenter_id = sub.dataset_presenter_id
+ ) t
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/DatasetPresenter.psql b/Model/lib/psql/webtables/??/DatasetPresenter.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/??/DomainAssignment.psql b/Model/lib/psql/webtables/??/DomainAssignment.psql
new file mode 100644
index 0000000000..7cb927d97e
--- /dev/null
+++ b/Model/lib/psql/webtables/??/DomainAssignment.psql
@@ -0,0 +1,69 @@
+
+
+ create table DomainAssignment as
+ select sa.full_id, sa.group_name,
+ r.interpro_primary_id as accession,
+ r.interpro_desc as description,
+ CAST (NULL as NUMERIC) as domain_index,
+ sa.aa_sequence_id,
+ r.interpro_start_min as start_min,
+ r.interpro_end_min as end_max
+ from SequenceAttributes sa, apidb.interproresults r
+ where sa.full_id = r.protein_source_id
+ and upper(r.interpro_db_name) = 'PFAM'
+
+ ;
+
+
+
+ create index domain_accession_ix
+ on DomainAssignment (accession, full_id, group_name)
+
+ ;
+
+
+
+ create table domainIndex as
+ select row_number() OVER () as domain_index, accession
+ from (select distinct accession
+ from DomainAssignment
+ order by accession)
+
+ ;
+
+
+
+ create index domainIdxIdx on DomainIndex(accession, domain_index)
+
+ ;
+
+
+
+ update DomainAssignment da
+ set domain_index = (select domain_index
+ from DomainIndex
+ where accession = da.accession)
+
+ ;
+
+
+
+ create index domain_ix_ix
+ on DomainAssignment (domain_index, accession, full_id)
+
+ ;
+
+
+
+ create index domain_group_ix
+ on DomainAssignment (group_name, accession, full_id)
+
+ ;
+
+
+
+ create index domain_seq_ix
+ on DomainAssignment (aa_sequence_id, accession, full_id, group_name)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/EdaGeneGraph.psql b/Model/lib/psql/webtables/??/EdaGeneGraph.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/??/EupathBuildDates.psql b/Model/lib/psql/webtables/??/EupathBuildDates.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql b/Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql
new file mode 100644
index 0000000000..0b8856e4e5
--- /dev/null
+++ b/Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql
@@ -0,0 +1,46 @@
+
+
+ CREATE TABLE ExternalDbDatasetPresenter AS
+ SELECT ed.external_database_id, ed.name AS external_database_name,
+ edr.external_database_release_id, SUBSTR(edr.version, 1, 40) AS external_database_version,
+ dsp.dataset_presenter_id, dsp.name AS dataset_presenter_name,
+ dsp.display_name AS dataset_presenter_display_name
+ FROM sres.externalDatabaseRelease edr, sres.externalDatabase ed, DatasetPresenter dsp
+ WHERE ed.external_database_id = edr.external_database_id
+ AND (ed.name = dsp.name
+ OR ed.name LIKE dsp.dataset_name_pattern)
+ ORDER BY ed.name
+
+ ;
+
+
+
+ create index edd_rlsidix
+ on ExternalDbDatasetPresenter
+ (external_database_release_id, external_database_id, external_database_name,
+ dataset_presenter_id, dataset_presenter_name, dataset_presenter_display_name)
+
+
+ ;
+
+
+
+ create index edd_dsidix
+ on ExternalDbDatasetPresenter
+ (dataset_presenter_id, external_database_id, external_database_release_id,
+ external_database_name, dataset_presenter_name, dataset_presenter_display_name)
+
+
+ ;
+
+
+
+ create index edd_dsnameix
+ on ExternalDbDatasetPresenter
+ (dataset_presenter_name, dataset_presenter_id, external_database_id,
+ external_database_release_id, external_database_name, external_database_version,
+ dataset_presenter_display_name)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql b/Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql
new file mode 100644
index 0000000000..1dd9673b23
--- /dev/null
+++ b/Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql
@@ -0,0 +1,60 @@
+
+
+ CREATE TABLE ExternalSequenceTaxonRank AS
+ WITH organism_rank AS (
+ SELECT tn1.taxon_id as organism, tn2.name as parent_organism,
+ tn2.taxon_id as parent_organism_id, r.rank
+ FROM sres.TaxonName tn1, sres.TaxonName tn2,
+ (
+ WITH RECURSIVE cte AS (
+ SELECT taxon_id as input, taxon_id, rank, parent_id
+ FROM sres.taxon
+ WHERE taxon_id IN (
+ SELECT taxon_id FROM dots.externalaasequence
+ UNION
+ SELECT taxon_id FROM apidb.taxonstring
+ )
+ UNION
+ SELECT cte.input, t.taxon_id, t.rank, t.parent_id
+ FROM sres.taxon t, cte
+ WHERE cte.parent_id = t.taxon_id
+ )
+ SELECT input, taxon_id, rank
+ FROM cte
+ ) r
+ WHERE r.input = tn1.taxon_id
+ and r.taxon_id = tn2.taxon_id
+ and tn1.name_class = 'scientific name'
+ and tn2.name_class = 'scientific name'
+ and r.rank in ('phylum', 'family','genus', 'species', 'superkingdom','kingdom', 'class', 'order')
+ )
+ SELECT organisms.organism,
+ coalesce(superkingdom.parent_organism, 'N/A') as superkingdom,
+ superkingdom.parent_organism_id as superkingdom_id,
+ coalesce(kingdom.parent_organism, 'N/A') as kingdom,
+ kingdom.parent_organism_id as kingdom_id,
+ coalesce (phylum.parent_organism, 'N/A') as phylum,
+ phylum.parent_organism_id as phylum_id,
+ coalesce (class.parent_organism, 'N/A') as class,
+ class.parent_organism_id as class_id,
+ coalesce (family.parent_organism, 'N/A') as family,
+ family.parent_organism_id as family_id,
+ coalesce (rank_order.parent_organism, 'N/A') as rank_order,
+ rank_order.parent_organism_id as rank_order_id,
+ coalesce ( genus.parent_organism, 'N/A') as genus,
+ genus.parent_organism_id as genus_id,
+ coalesce(species.parent_organism, 'N/A') as species,
+ species.parent_organism_id as species_id
+ FROM (SELECT DISTINCT organism FROM organism_rank) organisms
+ LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'phylum') phylum ON organisms.organism = phylum.organism
+ LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'genus') genus ON organisms.organism = genus.organism
+ LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'species') species ON organisms.organism = species.organism
+ LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'kingdom') kingdom ON organisms.organism = kingdom.organism
+ LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'superkingdom') superkingdom ON organisms.organism = superkingdom.organism
+ LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'class') class ON organisms.organism = class.organism
+ LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'family') family ON organisms.organism = family.organism
+ LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'order') rank_order ON organisms.organism = rank_order.organism
+ ORDER BY organism, species, genus
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/GeneGroupProfile.psql b/Model/lib/psql/webtables/??/GeneGroupProfile.psql
new file mode 100644
index 0000000000..e91fd35148
--- /dev/null
+++ b/Model/lib/psql/webtables/??/GeneGroupProfile.psql
@@ -0,0 +1,30 @@
+
+
+ create table GeneGroupProfile as
+ select distinct other_gene.source_id, p.dataset_name,
+ this_gene.source_id as profile_graph_id
+ from OrthologousTranscripts ot
+ , Profile p
+ , GeneAttributes this_gene
+ , GeneAttributes other_gene
+ where p.source_id = ot.source_id
+ and ot.source_id = this_gene.source_id
+ and ot.ortho_gene_source_id = other_gene.source_id
+ and this_gene.species = other_gene.species
+ and ot.is_syntenic = 1
+ union
+ select ga.source_id, p.dataset_name, p.source_id as profile_graph_id
+ from Profile p, GeneAttributes ga
+ where p.source_id = ga.source_id
+
+ ;
+
+
+
+ create index ggp_ix
+ on GeneGroupProfile
+ (source_id, dataset_name, profile_graph_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/GenomicSeqAttributes.psql b/Model/lib/psql/webtables/??/GenomicSeqAttributes.psql
new file mode 100644
index 0000000000..6922acea92
--- /dev/null
+++ b/Model/lib/psql/webtables/??/GenomicSeqAttributes.psql
@@ -0,0 +1,102 @@
+
+
+ CREATE TABLE :ORG_ABBREVGenomicSeqAttributes AS
+ SELECT
+ cast(apidb.prefixed_project_id(tn.name, ':ORG_ABBREV') as varchar(20)) as project_id,
+ SUBSTR(sequence.source_id, 1, 60) AS source_id, sequence.a_count,
+ sequence.c_count, sequence.g_count, sequence.t_count,
+ (sequence.length
+ - (sequence.a_count + sequence.c_count + sequence.g_count + sequence.t_count))
+ AS other_count,
+ sequence.length,
+ to_char((sequence.a_count + sequence.t_count) / sequence.length * 100, '99.99')
+ AS at_percent,
+ SUBSTR(tn.name, 1, 100) AS organism,
+ taxon.ncbi_tax_id,
+ taxon.taxon_id,
+ CASE WHEN sequence.description IS NULL THEN SUBSTR(tn.name, 1, 100)
+ ELSE SUBSTR(sequence.description, 1, 400)
+ END AS sequence_description,
+ SUBSTR(genbank.genbank_accession, 1, 20) AS genbank_accession,
+ SUBSTR(db.database_version, 1, 30) AS database_version, db.database_name,
+ SUBSTR(sequence.chromosome, 1, 20) AS chromosome,
+ sequence.external_database_release_id, sequence.sequence_ontology_id,
+ sequence.chromosome_order_num, so.source_id as so_id, so.name as sequence_type,
+ coalesce(virtualization.is_top_level, 1) as is_top_level,
+ sequence.na_sequence_id, organism.genome_source,
+ organism.name_for_filenames, coalesce(msa.has_msa, 0) as has_msa
+ FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon.taxon_id = organism.taxon_id,
+ sres.OntologyTerm so,
+ ( SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description,
+ a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id
+ FROM dots.ExternalNaSequence
+ UNION
+ SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description,
+ a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id
+ FROM dots.VirtualSequence
+ ) sequence
+ LEFT JOIN
+ (SELECT drns.na_sequence_id, max(dr.primary_identifier) AS genbank_accession
+ FROM dots.dbrefNaSequence drns, sres.DbRef dr,
+ sres.ExternalDatabaseRelease gb_edr, sres.ExternalDatabase gb_ed
+ WHERE drns.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = gb_edr.external_database_release_id
+ AND gb_edr.external_database_id = gb_ed.external_database_id
+ AND gb_ed.name = 'GenBank'
+ GROUP BY drns.na_sequence_id
+ ) genbank ON sequence.na_sequence_id = genbank.na_sequence_id
+ LEFT JOIN
+ (SELECT edr.external_database_release_id,
+ edr.version AS database_version, ed.name AS database_name
+ FROM sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE edr.external_database_id = ed.external_database_id
+ ) db ON sequence.external_database_release_id = db.external_database_release_id
+ LEFT JOIN
+ (SELECT distinct piece_na_sequence_id, 0 as is_top_level
+ FROM :ORG_ABBREVSequencePieceClosure
+ ) virtualization ON sequence.na_sequence_id = virtualization.piece_na_sequence_id
+ LEFT JOIN
+ (SELECT a_na_sequence_id as na_sequence_id, 1 as has_msa
+ FROM apidb.Synteny syn
+ GROUP BY a_na_sequence_id
+ ) msa ON sequence.na_sequence_id = msa.na_sequence_id
+ LEFT JOIN
+ (SELECT taxon_id, max(name) as name
+ FROM sres.TaxonName
+ WHERE name_class = 'scientific name'
+ GROUP BY taxon_id
+ ) tn ON sequence.taxon_id = tn.taxon_id
+ WHERE
+ sequence.taxon_id = taxon.taxon_id
+ AND sequence.sequence_ontology_id = so.ontology_term_id
+ AND (sequence.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
+ AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle')
+ ORDER BY organism, source_id
+
+ ;
+
+
+
+ create unique index pk_SeqAttr_ ON :ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id)
+
+ ;
+
+
+
+ create unique index SeqAttr_source_id ON :ORG_ABBREVGenomicSeqAttributes (source_id)
+
+ ;
+
+
+
+ create unique index SeqAttr_naseqid ON :ORG_ABBREVGenomicSeqAttributes (na_sequence_id)
+
+ ;
+
+
+
+ create unique index SeqAttr_taxsrc_id ON :ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql b/Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql
new file mode 100644
index 0000000000..a7d484850c
--- /dev/null
+++ b/Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql
@@ -0,0 +1,20 @@
+
+
+ CREATE table GroupPhylogeneticProfile as
+ SELECT rep.orthomcl_name, pp.profile_string
+ FROM apidb.PhylogeneticProfile pp,
+ (SELECT orthomcl_name, max(source_id) as source_id
+ FROM GeneAttributes
+ GROUP BY orthomcl_name) rep
+ WHERE rep.source_id = pp.source_id
+
+ ;
+
+
+
+ create index group_pp_ix
+ on GroupPhylogeneticProfile (orthomcl_name)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/OrthologousTranscripts.psql b/Model/lib/psql/webtables/??/OrthologousTranscripts.psql
new file mode 100644
index 0000000000..d6e7191d1e
--- /dev/null
+++ b/Model/lib/psql/webtables/??/OrthologousTranscripts.psql
@@ -0,0 +1,73 @@
+
+
+ create UNLOGGED table SyntenicPairs as
+ select distinct ga.na_feature_id, sg.syn_na_feature_id
+ from apidb.SyntenicGene sg, GeneAttributes ga
+ where sg.na_sequence_id = ga.na_sequence_id
+ and sg.end_max >= ga.start_min
+ and sg.start_min <= ga.end_max
+
+ ;
+
+
+
+ create index SynPair_idx
+ on SyntenicPairs (na_feature_id, syn_na_feature_id)
+
+
+ ;
+
+
+
+ create table OrthologousTranscripts as
+ with all_pairs
+ as (select ga.source_id
+ , ga.project_id
+ , ga.na_feature_id
+ , ota.source_id as ortho_source_id
+ , ota.gene_source_id as ortho_gene_source_id
+ , ota.project_id as ortho_project_id
+ , ota.gene_na_feature_id as ortho_na_feature_id
+ , ota.transcript_product as ortho_product
+ , ota.protein_length
+ , ga.name as ortho_name
+ , ota.organism as ortho_organism
+ , ota.taxon_id as ortho_taxon_id
+ , o.is_reference_strain
+ from Geneattributes ga
+ , TranscriptAttributes ota
+ , apidb.Organism o
+ where ga.ORTHOMCL_NAME = ota.ORTHOMCL_NAME
+ and ota.taxon_id = o.taxon_id
+ ),
+ syn_pairs
+ as (select na_feature_id, syn_na_feature_id, 1 as is_syntenic from SyntenicPairs
+ )
+ select all_pairs.*
+ , coalesce(syn_pairs.is_syntenic, 0) as is_syntenic
+ from all_pairs
+ left join syn_pairs
+ on all_pairs.na_feature_id = syn_pairs.na_feature_id
+ and all_pairs.ortho_na_feature_id = syn_pairs.syn_na_feature_id
+
+ ;
+
+
+
+ create index ot_idx
+ on OrthologousTranscripts (source_id, project_id, is_syntenic desc, ortho_source_id,
+ ortho_project_id, ortho_gene_source_id, ortho_product,
+ ortho_name, ortho_organism, ortho_taxon_id, is_reference_strain)
+
+
+ ;
+
+
+
+ create index ot_smol_idx
+ on OrthologousTranscripts (is_syntenic, ortho_taxon_id, source_id, ortho_source_id,
+ ortho_project_id, ortho_gene_source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/PANExtDbRls.psql b/Model/lib/psql/webtables/??/PANExtDbRls.psql
new file mode 100644
index 0000000000..82b469f127
--- /dev/null
+++ b/Model/lib/psql/webtables/??/PANExtDbRls.psql
@@ -0,0 +1,36 @@
+
+
+ CREATE TABLE :ORG_ABBREVPANExtDbRls AS
+ SELECT distinct protocol_app_node_id as pan_id, external_database_release_id, name as dataset_name
+ FROM (
+ SELECT
+ sl.protocol_app_node_id
+ , s.external_database_release_id
+ , d.name
+ FROM
+ study.nodeSet s
+ , study.nodeNodeSet sl
+ , sres.externaldatabaserelease r
+ , sres.externaldatabase d
+ WHERE
+ s.external_database_release_id = r.external_database_release_id
+ and r.external_database_id = d.external_database_id
+ and s.node_set_id = sl.node_set_id
+ and s.external_database_release_id is not null
+ UNION
+ SELECT pan.protocol_app_node_id
+ , pan.external_database_release_id
+ , d.name
+ FROM study.protocolappnode pan
+ , sres.externaldatabaserelease r
+ , sres.externaldatabase d
+ WHERE
+ pan.external_database_release_id = r.external_database_release_id
+ and r.external_database_id = d.external_database_id
+ and pan.external_database_release_id is not null
+ ) t
+ WHERE (name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+ ORDER BY external_database_release_id, protocol_app_node_id
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/PANIO.psql b/Model/lib/psql/webtables/??/PANIO.psql
new file mode 100644
index 0000000000..fa820f9d4b
--- /dev/null
+++ b/Model/lib/psql/webtables/??/PANIO.psql
@@ -0,0 +1,59 @@
+
+
+ CREATE TABLE :ORG_ABBREVPANIO AS
+ SELECT DISTINCT io.*
+ FROM (
+ SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id,
+ o.protocol_app_node_id output_pan_id,
+ in_type.source_id as input_pan_type_source_id,
+ --in_type.name as input_pan_type,
+ in_type.ontology_term_id as input_pan_type_id,
+ out_type.source_id as output_pan_type_source_id,
+ --out_type.name as output_pan_type,
+ out_type.ontology_term_id as output_pan_type_id
+ FROM study.ProtocolApp pa, study.Input i, study.Output o,
+ study.ProtocolAppNode in_pan LEFT JOIN sres.OntologyTerm in_type ON in_pan.type_id = in_type.ontology_term_id,
+ study.ProtocolAppNode out_pan LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id
+ WHERE i.protocol_app_id = pa.protocol_app_id
+ AND o.protocol_app_id = pa.protocol_app_id
+ AND i.protocol_app_node_id = in_pan.protocol_app_node_id
+ AND o.protocol_app_node_id = out_pan.protocol_app_node_id
+ ) io, :ORG_ABBREVpanextdbrls panExtDbRls
+ WHERE io.input_pan_id = panExtDbRls.pan_id -- the input and outputs will have same dataset in prefix enabled mode only
+ AND (panExtDbRls.dataset_name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+ ORDER BY io.input_pan_id, io.output_pan_id
+
+ ;
+
+
+
+ create index :ORG_ABBREVpainio2_iix on :ORG_ABBREVPANIO
+ (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
+
+
+ ;
+
+
+
+ create index :ORG_ABBREVpainio2_oix on :ORG_ABBREVPANIO
+ (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
+
+
+ ;
+
+
+
+ create index :ORG_ABBREVpainio2_otypeix on :ORG_ABBREVPANIO
+ (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id)
+
+
+ ;
+
+
+
+ create index :ORG_ABBREVpainio2_itypeix on :ORG_ABBREVPANIO
+ (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/PANResults.psql b/Model/lib/psql/webtables/??/PANResults.psql
new file mode 100644
index 0000000000..de67af4813
--- /dev/null
+++ b/Model/lib/psql/webtables/??/PANResults.psql
@@ -0,0 +1,91 @@
+
+
+ CREATE TABLE :ORG_ABBREVPANResults AS
+ SELECT DISTINCT r.*
+ FROM (
+ SELECT protocol_app_node_id as pan_id, 'Results::NAFeatureDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NAFeatureDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::ReporterIntensity' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterIntensity)
+ UNION
+ SELECT protocol_app_node_id, 'Results::SegmentResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SegmentResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::CompoundMassSpec' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.CompoundMassSpec)
+ UNION
+ SELECT protocol_app_node_id, 'Results::NaFeatureHostResponse' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NaFeatureHostResponse)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::ChrCopyNumber' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM ApiDB.ChrCopyNumber)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::GeneCopyNumber' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM ApiDB.GeneCopyNumber)
+ UNION
+ SELECT protocol_app_node_id, 'Results::NAFeatureExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NAFeatureExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::EditingEvent' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.EditingEvent)
+ UNION
+ SELECT protocol_app_node_id, 'Results::FamilyDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.FamilyDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::FamilyExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.FamilyExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::GeneDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::GeneExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::GeneSimilarity' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneSimilarity)
+ UNION
+ SELECT protocol_app_node_id, 'Results::ReporterDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::ReporterExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::RnaDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.RnaDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::RnaExpression' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.RnaExpression)
+ UNION
+ SELECT protocol_app_node_id, 'Results::LineageAbundance' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.LineageAbundance)
+ UNION
+ SELECT protocol_app_node_id, 'Results::SegmentDiffResult' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SegmentDiffResult)
+ UNION
+ SELECT protocol_app_node_id, 'Results::SeqVariation' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SeqVariation)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::SequenceVariation' as result_table FROM study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM study.ProtocolAppNode WHERE name like '% (Sequence Variation)')
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::MassSpecSummary' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.MASSSPECSUMMARY)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::IntronJunction' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.IntronJunction)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::RflpGenotype' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.RflpGenotype)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::RflpGenotypeNumber' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.RflpGenotypeNumber)
+ UNION
+ SELECT protocol_app_node_id, 'ApiDB::CrisprPhenotype' as result_table from study.ProtocolAppNode
+ WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.crisprphenotype)
+ ) r, :ORG_ABBREVpanextdbrls panExtDbRls
+ WHERE r.pan_id = panExtDbRls.pan_id
+ AND (panExtDbRls.dataset_name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/PhyleticPattern.psql b/Model/lib/psql/webtables/??/PhyleticPattern.psql
new file mode 100644
index 0000000000..6b5f20ca3c
--- /dev/null
+++ b/Model/lib/psql/webtables/??/PhyleticPattern.psql
@@ -0,0 +1,395 @@
+
+
+CREATE TABLE PhyleticPattern AS
+ (SELECT actual.group_name,
+ actual.alveolata as alveolata_actual,
+ total.alveolata as alveolata_total,
+ round(100*actual.alveolata/total.alveolata,0) AS alveolata_percent,
+ actual.archaea as archaea_actual,
+ total.archaea as archaea_total,
+ round(100*actual.archaea/total.archaea,0) AS archaea_percent,
+ actual.amoeba as amoeba_actual,
+ total.amoeba as amoeba_total,
+ round(100*actual.amoeba/total.amoeba,0) AS amoeba_percent,
+ actual.bacteria as bacteria_actual,
+ total.bacteria as bacteria_total,
+ round(100*actual.bacteria/total.bacteria,0) AS bacteria_percent,
+ actual.fungi as fungi_actual,
+ total.fungi as fungi_total,
+ round(100*actual.fungi/total.fungi,0) AS fungi_percent,
+ actual.euglenozoa as euglenozoa_actual,
+ total.euglenozoa as euglenozoa_total,
+ round(100*actual.euglenozoa/total.euglenozoa,0) AS euglenozoa_percent,
+ actual.metazoa as metazoa_actual,
+ total.metazoa as metazoa_total,
+ round(100*actual.metazoa/total.metazoa,0) AS metazoa_percent,
+ actual.viridiplantae as viridiplantae_actual,
+ total.viridiplantae as viridiplantae_total,
+ round(100*actual.viridiplantae/total.viridiplantae,0) AS viridiplantae_percent,
+ actual.other_eukaryotes as other_eukaryotes_actual,
+ total.other_eukaryotes as other_eukaryotes_total,
+ round(100*actual.other_eukaryotes/total.other_eukaryotes,0) AS other_eukaryotes_percent,
+ actual.bacteria_firm as bacteria_firm_actual,
+ total.bacteria_firm as bacteria_firm_total,
+ actual.bacteria_proa as bacteria_proa_actual,
+ total.bacteria_proa as bacteria_proa_total,
+ actual.bacteria_prob as bacteria_prob_actual,
+ total.bacteria_prob as bacteria_prob_total,
+ actual.bacteria_prod as bacteria_prod_actual,
+ total.bacteria_prod as bacteria_prod_total,
+ actual.bacteria_proe as bacteria_proe_actual,
+ total.bacteria_proe as bacteria_proe_total,
+ actual.bacteria_prog as bacteria_prog_actual,
+ total.bacteria_prog as bacteria_prog_total,
+ actual.bacteria_obac as bacteria_obac_actual,
+ total.bacteria_obac as bacteria_obac_total,
+ actual.archaea_arch as archaea_arch_actual,
+ total.archaea_arch as archaea_arch_total,
+ actual.archaea_eury as archaea_eury_actual,
+ total.archaea_eury as archaea_eury_total,
+ actual.archaea_cren as archaea_cren_actual,
+ total.archaea_cren as archaea_cren_total,
+ actual.archaea_nano as archaea_nano_actual,
+ total.archaea_nano as archaea_nano_total,
+ actual.archaea_kora as archaea_kora_actual,
+ total.archaea_kora as archaea_kora_total,
+ --oeuk_genera.html as oeuk_genera_html,
+ --eugl_genera.html as eugl_genera_html,
+ --amoe_genera.html as amoe_genera_html,
+ actual.alveolata_alve as alveolata_alve_actual,
+ total.alveolata_alve as alveolata_alve_total,
+ actual.alveolata_cili as alveolata_cili_actual,
+ total.alveolata_cili as alveolata_cili_total,
+ actual.alveolata_apic as alveolata_apic_actual,
+ total.alveolata_apic as alveolata_apic_total,
+ actual.alveolata_cocc as alveolata_cocc_actual,
+ total.alveolata_cocc as alveolata_cocc_total,
+ actual.alveolata_haem as alveolata_haem_actual,
+ total.alveolata_haem as alveolata_haem_total,
+ actual.alveolata_piro as alveolata_piro_actual,
+ total.alveolata_piro as alveolata_piro_total,
+ actual.viridiplantae_stre as viridiplantae_stre_actual,
+ total.viridiplantae_stre as viridiplantae_stre_total,
+ actual.viridiplantae_chlo as viridiplantae_chlo_actual,
+ total.viridiplantae_chlo as viridiplantae_chlo_total,
+ actual.viridiplantae_rhod as viridiplantae_rhod_actual,
+ total.viridiplantae_rhod as viridiplantae_rhod_total,
+ actual.viridiplantae_cryp as viridiplantae_cryp_actual,
+ total.viridiplantae_cryp as viridiplantae_cryp_total,
+ actual.fungi_fung as fungi_fung_actual,
+ total.fungi_fung as fungi_fung_total,
+ actual.fungi_micr as fungi_micr_actual,
+ total.fungi_micr as fungi_micr_total,
+ actual.fungi_basi as fungi_basi_actual,
+ total.fungi_basi as fungi_basi_total,
+ actual.fungi_asco as fungi_asco_actual,
+ total.fungi_asco as fungi_asco_total,
+ actual.fungi_muco as fungi_muco_actual,
+ total.fungi_muco as fungi_muco_total,
+ actual.fungi_chyt as fungi_chyt_actual,
+ total.fungi_chyt as fungi_chyt_total,
+ actual.metazoa_omet as metazoa_omet_actual,
+ total.metazoa_omet as metazoa_omet_total,
+ actual.metazoa_nema as metazoa_nema_actual,
+ total.metazoa_nema as metazoa_nema_total,
+ actual.metazoa_arth as metazoa_arth_actual,
+ total.metazoa_arth as metazoa_arth_total,
+ actual.metazoa_chor as metazoa_chor_actual,
+ total.metazoa_chor as metazoa_chor_total,
+ actual.metazoa_acti as metazoa_acti_actual,
+ total.metazoa_acti as metazoa_acti_total,
+ actual.metazoa_aves as metazoa_aves_actual,
+ total.metazoa_aves as metazoa_aves_total,
+ actual.metazoa_mamm as metazoa_mamm_actual,
+ total.metazoa_mamm as metazoa_mamm_total,
+ actual.metazoa_tuni as metazoa_tuni_actual,
+ total.metazoa_tuni as metazoa_tuni_total
+ FROM
+ (SELECT SUM(CASE clade WHEN 'alveolata' THEN num ELSE 0 END) as alveolata,
+ SUM(CASE clade WHEN 'archaea' THEN num ELSE 0 END) as archaea,
+ SUM(CASE clade WHEN 'amoeba' THEN num ELSE 0 END) as amoeba,
+ SUM(CASE clade WHEN 'bacteria' THEN num ELSE 0 END) as bacteria,
+ SUM(CASE clade WHEN 'fungi' THEN num ELSE 0 END) as fungi,
+ SUM(CASE clade WHEN 'euglenozoa' THEN num ELSE 0 END) as euglenozoa,
+ SUM(CASE clade WHEN 'metazoa' THEN num ELSE 0 END) as metazoa,
+ SUM(CASE clade WHEN 'viridiplantae' THEN num ELSE 0 END) as viridiplantae,
+ SUM(CASE clade WHEN 'other_eukaryotes' THEN num ELSE 0 END) as other_eukaryotes,
+ SUM(CASE clade WHEN 'FIRM' THEN num ELSE 0 END) as bacteria_firm,
+ SUM(CASE clade WHEN 'PROA' THEN num ELSE 0 END) as bacteria_proa,
+ SUM(CASE clade WHEN 'PROB' THEN num ELSE 0 END) as bacteria_prob,
+ SUM(CASE clade WHEN 'PROD' THEN num ELSE 0 END) as bacteria_prod,
+ SUM(CASE clade WHEN 'PROE' THEN num ELSE 0 END) as bacteria_proe,
+ SUM(CASE clade WHEN 'PROG' THEN num ELSE 0 END) as bacteria_prog,
+ SUM(CASE clade WHEN 'OBAC' THEN num ELSE 0 END) as bacteria_obac,
+ SUM(CASE clade WHEN 'ARCH' THEN num ELSE 0 END) as archaea_arch,
+ SUM(CASE clade WHEN 'EURY' THEN num ELSE 0 END) as archaea_eury,
+ SUM(CASE clade WHEN 'CREN' THEN num ELSE 0 END) as archaea_cren,
+ SUM(CASE clade WHEN 'NANO' THEN num ELSE 0 END) as archaea_nano,
+ SUM(CASE clade WHEN 'KORA' THEN num ELSE 0 END) as archaea_kora,
+ SUM(CASE clade WHEN 'ALVE' THEN num ELSE 0 END) as alveolata_alve,
+ SUM(CASE clade WHEN 'CILI' THEN num ELSE 0 END) as alveolata_cili,
+ SUM(CASE clade WHEN 'APIC' THEN num ELSE 0 END) as alveolata_apic,
+ SUM(CASE clade WHEN 'COCC' THEN num ELSE 0 END) as alveolata_cocc,
+ SUM(CASE clade WHEN 'HAEM' THEN num ELSE 0 END) as alveolata_haem,
+ SUM(CASE clade WHEN 'PIRO' THEN num ELSE 0 END) as alveolata_piro,
+ SUM(CASE clade WHEN 'STRE' THEN num ELSE 0 END) as viridiplantae_stre,
+ SUM(CASE clade WHEN 'CHLO' THEN num ELSE 0 END) as viridiplantae_chlo,
+ SUM(CASE clade WHEN 'RHOD' THEN num ELSE 0 END) as viridiplantae_rhod,
+ SUM(CASE clade WHEN 'CRYP' THEN num ELSE 0 END) as viridiplantae_cryp,
+ SUM(CASE clade WHEN 'FUNG' THEN num ELSE 0 END) as fungi_fung,
+ SUM(CASE clade WHEN 'MICR' THEN num ELSE 0 END) as fungi_micr,
+ SUM(CASE clade WHEN 'BASI' THEN num ELSE 0 END) as fungi_basi,
+ SUM(CASE clade WHEN 'ASCO' THEN num ELSE 0 END) as fungi_asco,
+ SUM(CASE clade WHEN 'MUCO' THEN num ELSE 0 END) as fungi_muco,
+ SUM(CASE clade WHEN 'CHYT' THEN num ELSE 0 END) as fungi_chyt,
+ SUM(CASE clade WHEN 'OMET' THEN num ELSE 0 END) as metazoa_omet,
+ SUM(CASE clade WHEN 'NEMA' THEN num ELSE 0 END) as metazoa_nema,
+ SUM(CASE clade WHEN 'ARTH' THEN num ELSE 0 END) as metazoa_arth,
+ SUM(CASE clade WHEN 'CHOR' THEN num ELSE 0 END) as metazoa_chor,
+ SUM(CASE clade WHEN 'ACTI' THEN num ELSE 0 END) as metazoa_acti,
+ SUM(CASE clade WHEN 'AVES' THEN num ELSE 0 END) as metazoa_aves,
+ SUM(CASE clade WHEN 'MAMM' THEN num ELSE 0 END) as metazoa_mamm,
+ SUM(CASE clade WHEN 'TUNI' THEN num ELSE 0 END) as metazoa_tuni
+ FROM (
+ (WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'BACT'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'bacteria' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'ARCH'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'archaea' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'ALVE'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'alveolata' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'AMOE'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'amoeba' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'EUGL'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'euglenozoa' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'VIRI'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'viridiplantae' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'FUNG'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'fungi' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'META'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'metazoa' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ WITH RECURSIVE TaxonHierarchy AS (
+ SELECT orthomcl_clade_id, parent_id
+ FROM apidb.OrthomclClade
+ WHERE orthomcl_clade_id IN (
+ SELECT orthomcl_clade_id
+ FROM apidb.OrthomclClade
+ WHERE three_letter_abbrev = 'OEUK'
+ ) UNION ALL
+ SELECT child.orthomcl_clade_id,child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT 'other_eukaryotes' AS clade, COUNT(*) AS num
+ FROM TaxonHierarchy th
+ JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id
+ WHERE c.core_peripheral IN ('C', 'P')
+
+ ) UNION (
+ SELECT b.three_letter_abbrev AS clade,a.num
+ FROM (SELECT parent_id, COUNT(orthomcl_clade_id) AS num
+ FROM apidb.OrthomclClade
+ WHERE core_peripheral in ('C','P')
+ GROUP BY parent_id) a,
+ apidb.OrthomclClade b
+ WHERE a.parent_id = b.orthomcl_clade_id
+ )
+ )) total,
+ (SELECT name as group_name,
+ SUM(CASE three_letter_abbrev WHEN 'ALVE' THEN number_of_taxa ELSE 0 END) as alveolata,
+ SUM(CASE three_letter_abbrev WHEN 'ARCH' THEN number_of_taxa ELSE 0 END) as archaea,
+ SUM(CASE three_letter_abbrev WHEN 'AMOE' THEN number_of_taxa ELSE 0 END) as amoeba,
+ SUM(CASE three_letter_abbrev WHEN 'BACT' THEN number_of_taxa ELSE 0 END) as bacteria,
+ SUM(CASE three_letter_abbrev WHEN 'FUNG' THEN number_of_taxa ELSE 0 END) as fungi,
+ SUM(CASE three_letter_abbrev WHEN 'EUGL' THEN number_of_taxa ELSE 0 END) as euglenozoa,
+ SUM(CASE three_letter_abbrev WHEN 'META' THEN number_of_taxa ELSE 0 END) as metazoa,
+ SUM(CASE three_letter_abbrev WHEN 'VIRI' THEN number_of_taxa ELSE 0 END) as viridiplantae,
+ SUM(CASE three_letter_abbrev WHEN 'OEUK' THEN number_of_taxa ELSE 0 END) as other_eukaryotes,
+ SUM(CASE three_letter_abbrev WHEN 'FIRM' THEN number_of_taxa ELSE 0 END) as bacteria_firm,
+ SUM(CASE three_letter_abbrev WHEN 'PROA' THEN number_of_taxa ELSE 0 END) as bacteria_proa,
+ SUM(CASE three_letter_abbrev WHEN 'PROB' THEN number_of_taxa ELSE 0 END) as bacteria_prob,
+ SUM(CASE three_letter_abbrev WHEN 'PROD' THEN number_of_taxa ELSE 0 END) as bacteria_prod,
+ SUM(CASE three_letter_abbrev WHEN 'PROE' THEN number_of_taxa ELSE 0 END) as bacteria_proe,
+ SUM(CASE three_letter_abbrev WHEN 'PROG' THEN number_of_taxa ELSE 0 END) as bacteria_prog,
+ SUM(CASE three_letter_abbrev WHEN 'OBAC' THEN number_of_taxa ELSE 0 END) as bacteria_obac,
+ SUM(CASE three_letter_abbrev WHEN 'ARCH' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'EURY' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'CREN' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'NANO' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'KORA' THEN number_of_taxa ELSE 0 END) as archaea_arch,
+ SUM(CASE three_letter_abbrev WHEN 'EURY' THEN number_of_taxa ELSE 0 END) as archaea_eury,
+ SUM(CASE three_letter_abbrev WHEN 'CREN' THEN number_of_taxa ELSE 0 END) as archaea_cren,
+ SUM(CASE three_letter_abbrev WHEN 'NANO' THEN number_of_taxa ELSE 0 END) as archaea_nano,
+ SUM(CASE three_letter_abbrev WHEN 'KORA' THEN number_of_taxa ELSE 0 END) as archaea_kora,
+ SUM(CASE three_letter_abbrev WHEN 'ALVE' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'CILI' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'APIC' THEN number_of_taxa ELSE 0 END) as alveolata_alve,
+ SUM(CASE three_letter_abbrev WHEN 'CILI' THEN number_of_taxa ELSE 0 END) as alveolata_cili,
+ SUM(CASE three_letter_abbrev WHEN 'APIC' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'COCC' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'ACON' THEN number_of_taxa ELSE 0 END) as alveolata_apic,
+ SUM(CASE three_letter_abbrev WHEN 'COCC' THEN number_of_taxa ELSE 0 END) as alveolata_cocc,
+ SUM(CASE three_letter_abbrev WHEN 'HAEM' THEN number_of_taxa ELSE 0 END) as alveolata_haem,
+ SUM(CASE three_letter_abbrev WHEN 'PIRO' THEN number_of_taxa ELSE 0 END) as alveolata_piro,
+ SUM(CASE three_letter_abbrev WHEN 'STRE' THEN number_of_taxa ELSE 0 END) as viridiplantae_stre,
+ SUM(CASE three_letter_abbrev WHEN 'CHLO' THEN number_of_taxa ELSE 0 END) as viridiplantae_chlo,
+ SUM(CASE three_letter_abbrev WHEN 'RHOD' THEN number_of_taxa ELSE 0 END) as viridiplantae_rhod,
+ SUM(CASE three_letter_abbrev WHEN 'CRYP' THEN number_of_taxa ELSE 0 END) as viridiplantae_cryp,
+ SUM(CASE three_letter_abbrev WHEN 'FUNG' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'MICR' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'BASI' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'ASCO' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'MUCO' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'CHYT' THEN number_of_taxa ELSE 0 END) as fungi_fung,
+ SUM(CASE three_letter_abbrev WHEN 'MICR' THEN number_of_taxa ELSE 0 END) as fungi_micr,
+ SUM(CASE three_letter_abbrev WHEN 'BASI' THEN number_of_taxa ELSE 0 END) as fungi_basi,
+ SUM(CASE three_letter_abbrev WHEN 'ASCO' THEN number_of_taxa ELSE 0 END) as fungi_asco,
+ SUM(CASE three_letter_abbrev WHEN 'MUCO' THEN number_of_taxa ELSE 0 END) as fungi_muco,
+ SUM(CASE three_letter_abbrev WHEN 'CHYT' THEN number_of_taxa ELSE 0 END) as fungi_chyt,
+ SUM(CASE three_letter_abbrev WHEN 'OMET' THEN number_of_taxa ELSE 0 END) as metazoa_omet,
+ SUM(CASE three_letter_abbrev WHEN 'NEMA' THEN number_of_taxa ELSE 0 END) as metazoa_nema,
+ SUM(CASE three_letter_abbrev WHEN 'ARTH' THEN number_of_taxa ELSE 0 END) as metazoa_arth,
+ SUM(CASE three_letter_abbrev WHEN 'CHOR' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'ACTI' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'AVES' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'MAMM' THEN number_of_taxa ELSE 0 END) -
+ SUM(CASE three_letter_abbrev WHEN 'TUNI' THEN number_of_taxa ELSE 0 END) as metazoa_chor,
+ SUM(CASE three_letter_abbrev WHEN 'ACTI' THEN number_of_taxa ELSE 0 END) as metazoa_acti,
+ SUM(CASE three_letter_abbrev WHEN 'AVES' THEN number_of_taxa ELSE 0 END) as metazoa_aves,
+ SUM(CASE three_letter_abbrev WHEN 'MAMM' THEN number_of_taxa ELSE 0 END) as metazoa_mamm,
+ SUM(CASE three_letter_abbrev WHEN 'TUNI' THEN number_of_taxa ELSE 0 END) as metazoa_tuni
+ FROM (SELECT og.group_id AS name,agt.three_letter_abbrev,agt.number_of_taxa::numeric
+ FROM ApiDB.OrthologGroupTaxon agt, apidb.orthologgroup og
+ WHERE agt.group_id = og.group_id
+ AND agt.three_letter_abbrev = UPPER(agt.three_letter_abbrev))
+ GROUP BY name) actual
+)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/ProjectTaxon.psql b/Model/lib/psql/webtables/??/ProjectTaxon.psql
new file mode 100644
index 0000000000..dd3d4ee56b
--- /dev/null
+++ b/Model/lib/psql/webtables/??/ProjectTaxon.psql
@@ -0,0 +1,151 @@
+
+
+ CREATE TABLE :ORG_ABBREVProjectTaxon AS
+ WITH
+ local_taxon -- a taxon found in this instance, either in dots.ExternalNaSequence or in apidb.Organism
+ AS ( SELECT distinct tn.name as taxon,
+ substr((tn.name), 1, position(' ' IN tn.name||' ') - 1) as first_word,
+ pi.name as project_id
+ FROM dots.ExternalNaSequence ens, sres.TaxonName tn, core.ProjectInfo pi
+ WHERE ens.taxon_id = tn.taxon_id
+ and ((tn.name not like 'Bodo %' and tn.name not like 'Drosophila %')
+ OR tn.name_class = 'scientific name')
+ and ens.row_project_id = pi.project_id
+ -- get names from apidb.Organism.family_name_for_files
+ -- (may not be necessary)
+ UNION
+ SELECT family_name_for_files as taxon,
+ substr((family_name_for_files), 1, position(' ' IN family_name_for_files||' ') - 1) as first_word,
+ project_name as project_id
+ FROM apidb.Organism
+ WHERE family_name_for_files in (select name from sres.TaxonName)
+ ),
+ mononym -- a taxon name that's the first word of a local taxon
+ AS (SELECT distinct lower(lt.first_word) as taxon, lt.project_id
+ FROM local_taxon lt, sres.TaxonName tn
+ WHERE lt.first_word = tn.name
+ -- and tn.name_class = 'scientific name'
+ ),
+ full_name -- the full name of a local taxon whose first name is not a taxon
+ AS (SELECT distinct lower(lt.taxon) as taxon, lt.project_id
+ FROM local_taxon lt, sres.TaxonName tn
+ WHERE lt.taxon = tn.name
+ -- and tn.name_class = 'scientific name'
+ and lower(lt.first_word) not in (select taxon from mononym))
+ SELECT * FROM mononym
+ UNION
+ SELECT * FROM full_name
+ UNION
+ SELECT 'hypocrea', 'FungiDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/afsm11', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/afsm2', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/amopi', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/asl1', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/de11d', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/de4a', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/frs/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gd-d1-1', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gd-d1-2', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gd-d1-3', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gillnor1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gillnor2/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/gillrich3/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/lithon', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/net12afl/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/netc1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/netc2/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/neth2t3/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/np251002/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/nrss/ii', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/pal2', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/pao27/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/rp', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sed5a/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedc1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedcb1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedct1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedmh1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sedst1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sm53', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/sm68', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/st4n', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/st8v/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/su03', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/su4', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/tg1162', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/tg1267', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/tun1/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/wt2708/i', 'TriTrypDB'
+ UNION
+ SELECT 'perkinsiella-like_sp._plo/wtuts/i', 'TriTrypDB'
+ UNION
+ SELECT 'plo_of_paramoeba_invadens_ags-2013', 'TriTrypDB'
+ UNION
+ SELECT 'soil_flagellate_and31', 'TriTrypDB'
+ UNION
+ SELECT 'kinetoplastid_flagellate_lfs2', 'TriTrypDB'
+ UNION
+ SELECT 'cryptaulaxoides-like_sp._tcs-2003', 'TriTrypDB'
+ ORDER BY 2, 1
+
+ ;
+
+
+
+ update :ORG_ABBREVProjectTaxon
+ set taxon = replace(taxon, '''', '')
+ where taxon like '%''%'
+
+ ;
+
+
+
+ create unique index data_load_prjct_err on :ORG_ABBREVProjectTaxon (taxon)
+
+ ;
+
+
+
+ create unique index projtax_ix on :ORG_ABBREVProjectTaxon (taxon, project_id)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/SequenceAttributes.psql b/Model/lib/psql/webtables/??/SequenceAttributes.psql
new file mode 100644
index 0000000000..4cba9b09fb
--- /dev/null
+++ b/Model/lib/psql/webtables/??/SequenceAttributes.psql
@@ -0,0 +1,131 @@
+
+
+
+ create table SequenceAttributes as
+ SELECT
+ aas.source_id AS full_id,
+ aas.source_id,
+ aas.aa_sequence_id,
+ length(aas.sequence) as length,
+ aas.description AS product,
+ aas.taxon_id,
+
+ taxon.orthomcl_taxon_id,
+ taxon.taxon_group,
+ taxon.orthomcl_abbrev AS taxon_abbreviation,
+ taxon.name AS organism_name,
+ taxon.core_peripheral,
+
+ o.group_id AS group_name,
+ o.ortholog_group_id,
+ o.number_of_members AS group_size,
+ o.number_of_core_members,
+ o.number_of_peripheral_members,
+ CASE is_residual WHEN 1 THEN 'Residual'
+ ELSE 'Core' END AS group_type,
+ urls.source_url,
+ urls.source_text
+ FROM
+ dots.AASequence aas,
+ apidb.orthologGroup o,
+ apidb.orthologGroupAASequence ogseq,
+ (
+ SELECT o.orthomcl_abbrev,
+ o.taxon_id as orthomcl_taxon_id,
+ t.name,
+ t.core_peripheral,
+ t.taxon_group
+ FROM apidb.organism o,
+ (WITH RECURSIVE TaxonHierarchy AS (
+ SELECT
+ three_letter_abbrev,
+ orthomcl_clade_id,
+ name,
+ core_peripheral,
+ name AS taxon_group,
+ parent_id
+ FROM apidb.OrthomclClade
+ WHERE name IN ('Archaea', 'Bacteria', 'Alveolates', 'Amoebozoa', 'Euglenozoa',
+ 'Fungi', 'Metazoa', 'Other Eukaryota', 'Viridiplantae')
+ UNION ALL
+ SELECT
+ child.three_letter_abbrev,
+ child.orthomcl_clade_id,
+ child.name,
+ child.core_peripheral,
+ parent.taxon_group,
+ child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT three_letter_abbrev, taxon_group, name, core_peripheral
+ FROM TaxonHierarchy
+ WHERE core_peripheral IN ('C', 'P')
+ ) t
+ WHERE t.three_letter_abbrev = o.orthomcl_abbrev
+ ) taxon,
+
+ (
+ SELECT aas.aa_sequence_id,
+ CASE
+ WHEN ores.resource_name IN ('AmoebaDB','CryptoDB','FungiDB','GiardiaDB','HostDB','MicrosporidiaDB',
+ 'PlasmoDB','PiroplasmaDB','ToxoDB','TrichDB','TriTrypDB','VectorBase')
+ THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/downloads')) || 'record/gene/'
+ || aas.source_id
+ WHEN ores.resource_name = 'Uniprot'
+ THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/proteomes') ) || 'uniprot/'
+ || aas.source_id
+ ELSE '' END AS source_url,
+ CASE WHEN ores.resource_name IS NULL THEN ''
+ ELSE aas.source_id || ' (' || ores.resource_name || ')' END AS source_text
+ FROM dots.AaSequence aas,
+ apidb.organism ot,
+ apidb.orthomclresource ores
+ WHERE ot.taxon_id = ores.orthomcl_taxon_id
+ AND ot.taxon_id = aas.taxon_id) urls
+ WHERE aas.aa_sequence_id = ogseq.aa_sequence_id
+ AND ogseq.group_id = o.group_id
+ AND aas.aa_sequence_id = urls.aa_sequence_id
+ AND taxon.orthomcl_taxon_id = aas.taxon_id
+ AND aas.taxon_id in (select distinct(eas.taxon_id) from apidb.organism og, dots.aasequence eas where eas.taxon_id = og.taxon_id)
+
+ ;
+
+
+
+ alter table SequenceAttributes
+ add constraint SeqAttrs_pk primary key (full_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_idx ON SequenceAttributes (full_id, group_name, taxon_id, source_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_gusIdx ON SequenceAttributes (ortholog_group_id, aa_sequence_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_idx2 ON SequenceAttributes (group_name, length desc, full_id, taxon_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_idx3
+ on SequenceAttributes (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_idx4 ON SequenceAttributes (source_id, full_id, group_name, taxon_id)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/SequenceEnzymeClass.psql b/Model/lib/psql/webtables/??/SequenceEnzymeClass.psql
new file mode 100644
index 0000000000..35fdff6793
--- /dev/null
+++ b/Model/lib/psql/webtables/??/SequenceEnzymeClass.psql
@@ -0,0 +1,36 @@
+
+
+ CREATE TABLE SequenceEnzymeClass AS (
+ SELECT sa.full_id
+ , sa.group_name
+ -- , sec.uniprot_accession
+ , ec.ec_number
+ , ec.description
+ , ec.parent_id as ec_parent
+ , ec.ec_number_1
+ , ec.ec_number_2
+ , ec.ec_number_3
+ , ec.ec_number_4
+ FROM sequenceattributes sa
+ , dots.AASequence aa
+ , dots.AASequenceEnzymeClass sec
+ , sres.ENZYMECLASS ec
+ WHERE sa.aa_sequence_id = aa.aa_sequence_id
+ AND sec.aa_sequence_id = aa.aa_sequence_id
+ AND sec.enzyme_class_id = ec.enzyme_class_id
+ )
+
+ ;
+
+
+
+ CREATE INDEX SequenceEnzymeClass_idx1 ON SequenceEnzymeClass (group_name, ec_number, description)
+
+ ;
+
+
+
+ CREATE INDEX SequenceEnzymeClass_idx2 ON SequenceEnzymeClass (full_id, ec_number, description)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/SequencePieceClosure.psql b/Model/lib/psql/webtables/??/SequencePieceClosure.psql
new file mode 100644
index 0000000000..3993db26d3
--- /dev/null
+++ b/Model/lib/psql/webtables/??/SequencePieceClosure.psql
@@ -0,0 +1,48 @@
+
+
+ CREATE TABLE :ORG_ABBREVSequencePieceClosure AS
+ SELECT sp.sequence_piece_id,
+ sp.virtual_na_sequence_id,
+ sp.piece_na_sequence_id,
+ sp.sequence_order,
+ sp.distance_from_left,
+ sp.uncertainty,
+ sp.strand_orientation,
+ sp.start_position,
+ sp.end_position,
+ sp.modification_date,
+ 1 AS edge_level
+ FROM dots.SequencePiece sp, dots.NaSequence ns
+ WHERE sp.piece_na_sequence_id = ns.na_sequence_id
+ AND (ns.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
+
+ ;
+
+
+
+ /* known issue: this should be run not just once, but iteratively
+ until it doesn't create new records. Currently (7/2008),
+ SequencePieces aren't nested even once. */
+ INSERT INTO :ORG_ABBREVSequencePieceClosure
+ (edge_level, virtual_na_sequence_id, piece_na_sequence_id,
+ distance_from_left, strand_orientation, modification_date,
+ start_position, end_position, sequence_order, sequence_piece_id)
+ SELECT 2, higher.virtual_na_sequence_id, lower.piece_na_sequence_id,
+ higher.distance_from_left,
+ case
+ when coalesce(higher.strand_orientation, '+') = coalesce(lower.strand_orientation, '+')
+ then '+'
+ else '-'
+ end as strand_orientation,
+ now(),
+ higher.start_position - lower.distance_from_left,
+ higher.end_position - lower.distance_from_left,
+ higher.sequence_order,
+ nextval('dots.sequencepiece_sq')
+ FROM :ORG_ABBREVSequencePieceClosure higher, :ORG_ABBREVSequencePieceClosure lower
+ WHERE higher.piece_na_sequence_id = lower.virtual_na_sequence_id
+ AND higher.start_position >= lower.start_position + lower.distance_from_left
+ AND higher.end_position <= lower.end_position + lower.distance_from_left
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/StudyIdDatasetId.psql b/Model/lib/psql/webtables/??/StudyIdDatasetId.psql
new file mode 100644
index 0000000000..c42d4df261
--- /dev/null
+++ b/Model/lib/psql/webtables/??/StudyIdDatasetId.psql
@@ -0,0 +1,24 @@
+
+
+ CREATE TABLE StudyIdDatasetId AS
+ SELECT s.STABLE_ID STUDY_STABLE_ID, dp.DATASET_PRESENTER_ID DATASET_ID, dp.SHORT_DISPLAY_NAME AS DATASET_SHORT_DISPLAY_NAME
+ FROM EDA.STUDY s
+ LEFT JOIN sres.EXTERNALDATABASERELEASE e ON s.EXTERNAL_DATABASE_RELEASE_ID =e.EXTERNAL_DATABASE_RELEASE_ID
+ LEFT JOIN sres.EXTERNALDATABASE e2 ON e.EXTERNAL_DATABASE_ID =e2.EXTERNAL_DATABASE_ID
+ LEFT JOIN DatasetPresenter dp on e2.name=dp.name
+ -- This is TEMPORARY (used for alpha MapVEU Application)
+ UNION
+ select case
+ when d.dataset_presenter_id = 'DS_480c976ef9' then 'VBP_MEGA'
+ when d.dataset_presenter_id = 'DS_e18287e335' then '2023-maine-ricinus'
+ when d.dataset_presenter_id = 'DS_2b98dd44ab' then '2010-Neafsey-M-S-Bamako'
+ else 'NA' end as study_stable_id,
+ d.dataset_presenter_id as dataset_id, d.short_display_name as dataset_short_display_name
+ from DatasetPresenter d
+ where d.dataset_presenter_id in (
+ 'DS_480c976ef9',
+ 'DS_e18287e335'
+ )
+
+ ;
+
diff --git a/Model/lib/psql/webtables/??/TypeAheadCounts.psql b/Model/lib/psql/webtables/??/TypeAheadCounts.psql
new file mode 100644
index 0000000000..921efa9bd7
--- /dev/null
+++ b/Model/lib/psql/webtables/??/TypeAheadCounts.psql
@@ -0,0 +1,42 @@
+
+
+CREATE TABLE TypeAheadCounts AS
+ (SELECT DISTINCT dr.primary_identifier as option_id,
+ count(distinct aaf.aa_sequence_id) AS protein_count
+ FROM dots.DbRefAaFeature draf, sres.DbRef dr, dots.aafeature aaf,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE draf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND aaf.aa_feature_id = draf.aa_feature_id
+ GROUP BY dr.primary_identifier
+ )
+ UNION
+ (SELECT DISTINCT ec.ec_number AS option_id,
+ COUNT(DISTINCT asec.aa_sequence_id) AS protein_count
+ FROM sres.enzymeClass ec,
+ dots.aaSequenceEnzymeClass asec
+ WHERE asec.enzyme_class_id = ec.enzyme_class_id
+ GROUP BY ec.ec_number)
+ UNION
+ (SELECT DISTINCT gt.source_id AS option_id,
+ COUNT(DISTINCT aaf.aa_sequence_id) AS protein_count
+ FROM dots.AaFeature aaf,
+ dots.GoAssociation ga, sres.OntologyTerm gt,
+ dots.GoAssociationInstance gai,core.TableInfo ti
+ WHERE aaf.aa_sequence_id = ga.row_id
+ AND ga.table_id = ti.table_id
+ AND ti.name = 'ExternalAASequence'
+ AND ga.go_term_id = gt.ontology_term_id
+ AND ga.go_association_id = gai.go_association_id
+ GROUP BY gt.source_id)
+
+ ;
+
+
+
+CREATE INDEX TypeAheadCounts_idx ON TypeAheadCounts (option_id)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes.psql b/Model/lib/psql/webtables/MG/CompoundAttributes.psql
new file mode 100644
index 0000000000..3f69995d58
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/CompoundAttributes.psql
@@ -0,0 +1,28 @@
+
+
+ CREATE TABLE :ORG_ABBREVCompoundAttributes AS
+ SELECT p.ID
+ , p.source_id
+ , p.compound_name
+ , string_agg(childc.other_names, ';' ORDER BY childc.other_names) AS other_names
+ , string_agg(childc.iupac_name, ';' ORDER BY childc.iupac_name) AS iupac_name
+ , string_agg(childc.syn, ';' ORDER BY childc.syn) AS syn
+ , p.definition
+ , p.secondary_ids
+ , string_agg(childc.formula, ';' ORDER BY childc.formula) AS formula
+ , avg(childc.mass::numeric) AS mass
+ FROM CompoundProperties p
+ , (SELECT id, parent_id, other_names, iupac_name, syn, mass, formula FROM CompoundProperties ) childc
+ WHERE p.parent_id IS NULL
+ AND ( p.ID = childc.parent_id OR p.ID = childc.ID )
+ GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids
+
+ ;
+
+
+
+ CREATE INDEX :ORG_ABBREVCompoundAttributes_idx ON :ORG_ABBREVCompoundAttributes (source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/CompoundId.psql b/Model/lib/psql/webtables/MG/CompoundId.psql
new file mode 100644
index 0000000000..64eafbd86a
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/CompoundId.psql
@@ -0,0 +1,39 @@
+
+
+ CREATE TABLE :ORG_ABBREVCompoundId AS
+ SELECT source_id AS id, source_id AS compound, 'same ID' AS type, '' as source
+ FROM :ORG_ABBREVCompoundAttributes
+ UNION
+ SELECT p.source_id AS id, ca.source_id AS compound, 'child ID' AS type, '' as source
+ FROM :ORG_ABBREVCompoundAttributes ca, CompoundProperties p
+ WHERE ca.id = p.parent_id
+ UNION
+ SELECT da.accession_number AS id, p.source_id AS compound, 'KEGG' AS type, '' as source
+ FROM chebi.database_accession da, :ORG_ABBREVCompoundAttributes p
+ WHERE da.type='KEGG COMPOUND accession'
+ AND da.compound_id = p.id
+ UNION
+ SELECT distinct da.accession_number AS id, p.chebi_accession AS compound, 'KEGG' as type, '' as source
+ FROM chebi.database_accession da, chebi.compounds c, chebi.compounds p
+ WHERE NOT p.status in ('D', 'F') AND da.type='KEGG COMPOUND accession'
+ AND da.compound_id = c.id AND c.parent_id=p.id
+ UNION
+ SELECT n.name as id, ca.source_id as compound, 'name' as type, n.source
+ FROM :ORG_ABBREVCompoundAttributes ca, chebi.names n
+ WHERE ca.id = n.compound_id
+ AND n.type = 'NAME'
+ UNION
+ SELECT n.name as id, ca.source_id as compound, 'synonym' as type, n.source
+ FROM :ORG_ABBREVCompoundAttributes ca, chebi.names n
+ WHERE ca.id = n.compound_id
+ AND n.type = 'SYNONYM'
+
+ ;
+
+
+
+ CREATE INDEX :ORG_ABBREVCompoundId_idx ON :ORG_ABBREVCompoundId (id, compound)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/CompoundProperties.psql b/Model/lib/psql/webtables/MG/CompoundProperties.psql
new file mode 100644
index 0000000000..7bea00b811
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/CompoundProperties.psql
@@ -0,0 +1,29 @@
+
+
+ CREATE TABLE CompoundProperties AS
+ SELECT c.ID, c.chebi_accession AS source_id, c.parent_id,
+ c.name AS compound_name,
+ substr(string_agg(cn.name, ';'), 1, 1000) AS other_names,
+ substr(string_agg(ciup.iupac_name, ';'), 1, 1000) AS iupac_name,
+ substr(string_agg(csyn.syn, ';'), 1, 1000) AS syn,
+ c.definition, m.mass,
+ string_agg(formu.formula, ';' order by formu.formula) AS formula,
+ string_agg(sec.chebi_accession, ';' order by sec.chebi_accession) AS secondary_ids
+ FROM chebi.compounds c
+ LEFT JOIN ( SELECT compound_id, NAME FROM chebi.names WHERE TYPE='NAME')
+ cn ON c.ID = cn.compound_id
+ LEFT JOIN ( SELECT compound_id, MIN(NAME) AS iupac_name FROM chebi.names WHERE TYPE='IUPAC NAME' GROUP BY compound_id)
+ ciup ON c.ID = ciup.compound_id
+ LEFT JOIN ( SELECT compound_id, MIN(NAME) AS syn FROM chebi.names WHERE type='SYNONYM' GROUP BY compound_id)
+ csyn ON c.ID = csyn.compound_id
+ LEFT JOIN ( SELECT compound_id, chemical_data AS formula FROM chebi.chemical_data WHERE TYPE='FORMULA')
+ formu ON c.ID = formu.compound_id
+ LEFT JOIN ( SELECT compound_id, chemical_data AS mass FROM chebi.chemical_data WHERE TYPE='MASS' and chemical_data != 'NaN')
+ m ON c.ID = m.compound_id
+ LEFT JOIN ( SELECT parent_id, chebi_accession FROM chebi.compounds)
+ sec ON c.ID = sec.parent_id
+ WHERE NOT c.status in ('D', 'F')
+ GROUP BY c.ID, c.chebi_accession, c.parent_id, c.name, c.definition, m.mass
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql b/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql
new file mode 100644
index 0000000000..244b989c5c
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql
@@ -0,0 +1,15 @@
+
+
+ CREATE TABLE CompoundTypeAheads AS
+ SELECT ca.source_id AS compound_id,
+ ca.source_id || ' (' || ca.compound_name || ')' AS display
+ FROM CompoundAttributes ca, PathwayCompounds pc
+ WHERE pc.chebi_accession = ca.source_id
+ UNION
+ SELECT ca.source_id AS compound_id,
+ pc.compound_source_id || ' (' || ca.compound_name || ')' AS display
+ FROM CompoundAttributes ca, PathwayCompounds pc
+ WHERE pc.chebi_accession = ca.source_id
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
new file mode 100644
index 0000000000..766215dcc9
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
@@ -0,0 +1,32 @@
+
+
+CREATE TABLE GroupDomainAttribute AS
+(
+SELECT og.group_id AS group_name, ag.descriptions
+FROM apidb.OrthologGroup og,
+ (SELECT group_name,
+ STRING_AGG(accession ||' (' || num_proteins|| ')', ', ') AS descriptions
+ FROM (SELECT group_name, accession, num_proteins, rnk
+ FROM (SELECT group_name, accession, num_proteins,
+ rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk
+ FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins
+ FROM DomainAssignment
+ GROUP BY group_name,accession
+ )
+ )
+ WHERE rnk <= 3
+ )
+ GROUP BY group_name
+ ORDER BY 1
+ ) ag
+WHERE og.group_id = ag.group_name
+)
+
+ ;
+
+
+
+CREATE INDEX GroupDomainAttribute_idx ON GroupDomainAttribute (group_name)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql
new file mode 100644
index 0000000000..a117a4a74d
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql
@@ -0,0 +1,48 @@
+
+
+ CREATE UNLOGGED TABLE :ORG_ABBREVIs_a_links AS
+ SELECT subject_term_id, object_term_id
+ FROM sres.OntologyRelationship rel, sres.OntologyTerm pred
+ WHERE rel.predicate_term_id = pred.ontology_term_id
+ AND pred.name = 'is_a'
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE :ORG_ABBREVRoots AS
+ SELECT object_term_id FROM :ORG_ABBREVis_a_links
+ EXCEPT
+ SELECT subject_term_id FROM :ORG_ABBREVis_a_links
+
+ ;
+
+
+
+ CREATE TABLE :ORG_ABBREVOntologyLevels as
+ WITH RECURSIVE levels(ontology_term_id, depth) AS (
+ SELECT object_term_id, 1 as depth FROM :ORG_ABBREVRoots
+ UNION
+ SELECT :ORG_ABBREVis_a_links.subject_term_id, levels.depth + 1 as depth
+ FROM :ORG_ABBREVIs_a_links, levels
+ WHERE :ORG_ABBREVis_a_links.object_term_id = levels.ontology_term_id
+ )
+ SELECT ontology_term_id, min(depth) as min_depth, max(depth) as max_depth
+ FROM (
+ SELECT ontology_term_id, depth
+ FROM levels
+ WHERE ontology_term_id NOT IN (SELECT object_term_id FROM :ORG_ABBREVRoots)
+ UNION
+ SELECT object_term_id, 0 FROM :ORG_ABBREVRoots
+ ) t
+ GROUP BY ontology_term_id
+
+ ;
+
+
+
+ create index olev_termix on :ORG_ABBREVOntologyLevels (ontology_term_id, min_depth, max_depth)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webtables/MG/PathwayAttributes.psql
new file mode 100644
index 0000000000..445fd4bffb
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/PathwayAttributes.psql
@@ -0,0 +1,68 @@
+
+
+ CREATE TABLE PathwayAttributes as
+ SELECT
+ p.source_id
+ , p.pathway_id
+ , p.name
+ , enz.total_enzyme_count
+ , cpd.total_compound_count
+ , p.url
+ , replace(replace(ed.name, 'Pathways_', ''), '_RSRC', '') as pathway_source
+ , ed.name as external_db_name
+ , edr.version as external_db_version
+ FROM
+ sres.pathway p
+ , sres.externalDatabase ed
+ , sres.externalDatabaseRelease edr
+ ,(SELECT
+ COUNT( *) AS total_compound_count
+ , pathway_id
+ FROM
+ sres.pathwayNode pn
+ , SRES.ontologyterm ot
+ WHERE
+ pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'molecular entity'
+ GROUP BY
+ pathway_id
+ ) cpd
+ ,(SELECT
+ COUNT( *) AS total_enzyme_count
+ , pathway_id
+ FROM
+ sres.pathwayNode pn
+ , SRES.ontologyterm ot
+ WHERE
+ pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'enzyme'
+ GROUP BY
+ pathway_id
+ ) enz
+ WHERE
+ ed.external_database_id = edr.external_database_id
+ AND edr.external_database_release_id = p.external_database_release_id
+ AND cpd.pathway_id = p.pathway_id
+ AND enz.pathway_id = p.pathway_id
+ AND source_id NOT IN('ec01100', 'ec01110', 'ec01120')
+ -- temporarily remove MPMP from release 46
+ AND ed.name NOT LIKE '%MPMP%'
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX PathAttr_sourceId_pwaySrc
+ ON :ORG_ABBREVPathwayAttributes (source_id, pathway_source)
+
+
+ ;
+
+
+
+ create index PathAttr_ix
+ on PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds.psql b/Model/lib/psql/webtables/MG/PathwayCompounds.psql
new file mode 100644
index 0000000000..1ca74c7a6c
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/PathwayCompounds.psql
@@ -0,0 +1,91 @@
+
+
+ CREATE TABLE PathwayCompounds AS
+ SELECT
+ pathway_id
+ , reaction_id
+ , ext_db_name
+ , ext_db_version
+ , compound_node_id
+ , compound_source_id
+ , c.chebi_accession
+ , case when c.chebi_accession is not null then 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=' || c.chebi_accession else null end as chebi_url
+ , type
+ FROM (
+ SELECT
+ p.PATHWAY_ID
+ , prx.PATHWAY_REACTION_ID as reaction_id
+ , ed.NAME as ext_db_name
+ , edr.version as ext_db_version
+ , pn.pathway_node_id as compound_node_id
+ , pn.DISPLAY_LABEL as compound_source_id
+ , 'substrate' as type
+ , pn.row_id
+ FROM
+ APIDB.PATHWAYREACTION prx
+ , SRES.PATHWAY p
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.ONTOLOGYTERM ot
+ , SRES.EXTERNALDATABASE ed
+ , SRES.EXTERNALDATABASERELEASE edr
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND prx.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.NAME = 'molecular entity'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND p.EXTERNAL_DATABASE_RELEASE_ID = edr.EXTERNAL_DATABASE_RELEASE_ID
+ AND edr.EXTERNAL_DATABASE_ID = ed.EXTERNAL_DATABASE_ID
+ ) t LEFT OUTER JOIN CHEBI.COMPOUNDS c on t.row_id = c.ID
+ UNION
+ SELECT
+ pathway_id
+ , reaction_id
+ , ext_db_name
+ , ext_db_version
+ , compound_node_id
+ , compound_source_id
+ , c.chebi_accession
+ , case when c.chebi_accession is not null then 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=' || c.chebi_accession else null end as chebi_url
+ , type
+ FROM (
+ SELECT
+ p.PATHWAY_ID
+ , prx.PATHWAY_REACTION_ID as reaction_id
+ , ed.NAME as ext_db_name
+ , edr.version as ext_db_version
+ , pn.pathway_node_id as compound_node_id
+ , pn.DISPLAY_LABEL as compound_source_id
+ , 'product' as type
+ , pn.row_id
+ FROM
+ APIDB.PATHWAYREACTION prx
+ , SRES.PATHWAY p
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.ONTOLOGYTERM ot
+ , SRES.EXTERNALDATABASE ed
+ , SRES.EXTERNALDATABASERELEASE edr
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND prx.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.ASSOCIATED_NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.NAME = 'molecular entity'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND p.EXTERNAL_DATABASE_RELEASE_ID = edr.EXTERNAL_DATABASE_RELEASE_ID
+ AND edr.EXTERNAL_DATABASE_ID = ed.EXTERNAL_DATABASE_ID
+ ) t2 LEFT OUTER JOIN CHEBI.COMPOUNDS c on t2.row_id = c.ID
+
+ ;
+
+
+
+ create index PthCmpd_id_ix
+ on PathwayCompounds (pathway_id, reaction_id, ext_db_name)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/PathwayNodes.psql b/Model/lib/psql/webtables/MG/PathwayNodes.psql
new file mode 100644
index 0000000000..82ca920c30
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/PathwayNodes.psql
@@ -0,0 +1,306 @@
+
+
+ CREATE UNLOGGED TABLE NodesWithTypes AS
+ SELECT pn.pathway_id
+ , CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS display_label
+ , pa.url
+ , CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS name
+ , pa.source_id AS node_identifier
+ , pn.pathway_node_id AS pathway_node_id
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , ot.name AS type
+ , NULL AS gene_count
+ , NULL AS default_structure
+ FROM sres.pathwaynode pn
+ INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
+ LEFT JOIN PathwayAttributes pa ON pn.display_label = pa.source_id
+ WHERE ot.name = 'metabolic process'
+ UNION ALL
+ SELECT pn.pathway_id
+ , pn.display_label
+ , NULL AS url
+ , coalesce(ec.description, pn.display_label) AS name
+ , ec.ec_number AS node_identifier
+ , pn.pathway_node_id AS pathway_node_id
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , ot.name AS type
+ , count (tp.gene_source_id) as gene_count
+ , NULL AS default_structure
+ FROM sres.pathwaynode pn
+ INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
+ LEFT JOIN sres.enzymeclass ec ON pn.row_id = ec.enzyme_class_id
+ LEFT JOIN PathwayNodeGene tp ON pn.pathway_node_id = tp.pathway_node_id
+ WHERE ot.name = 'enzyme'
+ GROUP BY pn.pathway_id
+ , pn.display_label
+ , ec.description
+ , ec.ec_number
+ , pn.pathway_node_id
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , ot.name
+ UNION ALL
+ SELECT pn.pathway_id
+ , pn.display_label
+ , NULL AS url
+ , coalesce(c.name, pn.display_label) AS name
+ , c.chebi_accession AS node_identifier
+ , pn.pathway_node_id AS pathway_node_id
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , ot.name AS type
+ , NULL AS gene_count
+ , st.default_structure
+ FROM sres.pathwaynode pn
+ INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
+ LEFT JOIN chebi.compounds c ON pn.row_id = c.id
+ LEFT JOIN (
+ SELECT n.compound
+ , s.structure AS default_structure
+ FROM chebi.structures s
+ , (SELECT id
+ , compound
+ FROM CompoundId
+ WHERE type IN ('same ID', 'child ID')
+ ) n
+ WHERE n.id = 'CHEBI:' || s.compound_id
+ AND s.type = 'mol'
+ AND s.dimension = '2D'
+ AND s.default_structure = 'Y'
+ ) st ON c.chebi_accession = st.compound
+ WHERE ot.name = 'molecular entity'
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE ReactionsWithReversibility AS
+ SELECT DISTINCT spr.pathway_relationship_id
+ , tpr.is_reversible
+ , tpr.reaction_source_id
+ FROM sres.pathwayrelationship spr
+ , apidb.pathwayreactionrel prr
+ , PathwayReactions tpr
+ WHERE prr.pathway_relationship_id = spr.pathway_relationship_id
+ AND tpr.reaction_id = prr.pathway_reaction_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE EnzymeEdges AS
+ SELECT DISTINCT nwt.pathway_id AS pathway_id
+ , nwt.pathway_node_id AS e_id
+ , nwt.type
+ , i.associated_node_id AS m1_id
+ , rri.is_reversible AS ir1
+ , o.node_id AS m2_id
+ , rro.is_reversible AS ir2
+ , i.associated_node_id || '_' || o.node_id || '_' || rri.is_reversible || '_' || rro.is_reversible as io
+ FROM sres.pathwayrelationship i
+ , sres.pathwayrelationship o
+ , NodesWithTypes nwt
+ , ReactionsWithReversibility rri
+ , ReactionsWithReversibility rro
+ WHERE i.node_id = nwt.pathway_node_id
+ AND o.associated_node_id = nwt.pathway_node_id
+ AND i.pathway_relationship_id = rri.pathway_relationship_id
+ AND o.pathway_relationship_id = rro.pathway_relationship_id
+ AND nwt.type = 'enzyme'
+ AND rri.is_reversible = rro.is_reversible
+ AND rri.reaction_source_id = rro.reaction_source_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE ParentNodes AS
+ WITH AllEnzymeEdges AS (
+ SELECT string_agg(io, ',' ORDER BY io) AS all_edges
+ , e_id
+ , pathway_id
+ FROM EnzymeEdges
+ GROUP BY pathway_id
+ , e_id
+ )
+ , pn as (
+ SELECT pathway_id
+ , all_edges
+ , string_agg(e_id::varchar, '_' ORDER BY e_id) AS parent
+ FROM AllEnzymeEdges
+ GROUP BY pathway_id
+ , all_edges
+ HAVING COUNT (*) > 1
+ )
+ SELECT aee.e_id, pn.*
+ FROM pn
+ , AllEnzymeEdges aee
+ WHERE aee.all_edges = pn.all_edges
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE NodesWithParents AS
+ SELECT DISTINCT ee.e_id AS pathway_node_id
+ , pn.parent
+ , ee.type AS node_type
+ , ee.pathway_id
+ FROM EnzymeEdges ee
+ , ParentNodes pn
+ WHERE pn.pathway_id = ee.pathway_id
+ AND ee.e_id = pn.e_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE EnzymeReactions AS
+ SELECT DISTINCT pn.PATHWAY_NODE_ID node_id
+ , pr.SOURCE_ID AS reaction_source_id
+ FROM sres.pathwaynode pn
+ , apidb.pathwayreaction pr
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYRELATIONSHIP prel
+ , sres.ontologyterm ot
+ WHERE (prel.NODE_ID = pn.PATHWAY_NODE_ID OR prel.ASSOCIATED_NODE_ID = pn.PATHWAY_NODE_ID)
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND ot.name = 'enzyme'
+ AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE ParentsForEdges AS
+ SELECT ee.e_id
+ , ee.m1_id
+ , ee.ir1
+ , ee.m2_id
+ , ee.ir2
+ , np.parent
+ FROM EnzymeEdges ee
+ , NodesWithParents np
+ WHERE ee.e_id = np.pathway_node_id
+
+ ;
+
+
+
+ CREATE TABLE PathwayEdges AS
+ SELECT pa.source_id
+ , pa.pathway_source
+ , rel.*
+ FROM (
+ SELECT DISTINCT ee.pathway_id
+ , coalesce(pe.parent, ee.e_id::varchar) AS source
+ , ee.m1_id::varchar AS target
+ , max(ee.ir1) AS is_reversible
+ FROM EnzymeEdges ee
+ LEFT JOIN ParentsForEdges pe ON ee.e_id = pe.e_id
+ GROUP BY ee.pathway_id, ee.m1_id, coalesce(pe.parent, ee.e_id::varchar)
+ UNION
+ SELECT DISTINCT ee.pathway_id
+ , ee.m2_id::varchar AS source
+ , coalesce(pe.parent, ee.e_id::varchar) AS target
+ , max(ee.ir2) AS is_reversible
+ FROM EnzymeEdges ee
+ LEFT JOIN ParentsForEdges pe ON ee.e_id = pe.e_id
+ GROUP BY ee.pathway_id, ee.m2_id, coalesce(pe.parent, ee.e_id::varchar)
+ UNION
+ SELECT pn1.pathway_id
+ , pr.node_id::varchar AS source
+ , pr.associated_node_id::varchar AS target
+ , pr.is_reversible
+ FROM sres.pathwayrelationship pr
+ , sres.pathwaynode pn1
+ , sres.pathwaynode pn2
+ , sres.ontologyterm ot1
+ , sres.ontologyterm ot2
+ WHERE pr.node_id = pn1.pathway_node_id
+ AND pr.associated_node_id = pn2.pathway_node_id
+ AND pn1.pathway_node_type_id = ot1.ontology_term_id
+ AND pn2.pathway_node_type_id = ot2.ontology_term_id
+ AND ot1.name != 'enzyme'
+ AND ot2.name != 'enzyme'
+ ) rel
+ , PathwayAttributes pa
+ WHERE pa.pathway_id = rel.pathway_id
+
+ ;
+
+
+
+ CREATE TABLE PathwayNodes AS
+ SELECT pa.source_id
+ , pa.pathway_source
+ , pn.display_label
+ , pn.x
+ , pn.y
+ , pn.width
+ , pn.height
+ , pn.cellular_location
+ , coalesce(pn.url,
+ CASE WHEN coalesce(type, nodes_with_parents.node_type) = 'enzyme' THEN
+ CASE
+ -- CHECK AND FIX
+ --WHEN REGEXP_LIKE (display_label, '^\d+\.(\d|-)+\.(\d|-)+\.(\d|-)+$') THEN 'https://enzyme.expasy.org/EC/' || display_label
+ WHEN pa.pathway_source = 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || reaction_source_id
+ WHEN pa.pathway_source = 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || reaction_source_id
+ WHEN pa.pathway_source = 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || reaction_source_id
+ WHEN pa.pathway_source = 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || reaction_source_id
+ ELSE NULL END
+ ELSE NULL END) AS url
+ , pn.name
+ , pn.node_identifier
+ , nodes_with_parents.pathway_node_id AS id
+ , nodes_with_parents.parent
+ , reaction_source_id
+ , coalesce(type, nodes_with_parents.node_type) AS node_type
+ , pn.gene_count
+ , pn.default_structure
+ FROM
+ ( SELECT e_id::varchar AS pathway_node_id
+ , nwp.parent
+ , type AS node_type
+ , ee.pathway_id
+ , er.reaction_source_id
+ FROM EnzymeEdges ee
+ INNER JOIN EnzymeReactions er ON er.node_id = ee.e_id
+ LEFT JOIN NodesWithParents nwp ON ee.e_id = nwp.pathway_node_id
+ UNION
+ SELECT nwp.parent
+ , NULL
+ , 'nodeOfNodes'
+ , pathway_id
+ , NULL
+ FROM NodesWithParents nwp
+ UNION
+ SELECT nwt.pathway_node_id::varchar AS pathway_node_id
+ , NULL AS parent
+ , nwt.type
+ , nwt.pathway_id
+ , NULL
+ FROM NodesWithTypes nwt
+ WHERE nwt.type != 'enzyme'
+ ) nodes_with_parents
+ INNER JOIN PathwayAttributes pa ON nodes_with_parents.pathway_id = pa.pathway_id
+ LEFT JOIN NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MG/PathwayReactions.psql b/Model/lib/psql/webtables/MG/PathwayReactions.psql
new file mode 100644
index 0000000000..29f1ecef7c
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/PathwayReactions.psql
@@ -0,0 +1,141 @@
+
+
+ CREATE TABLE PathwayReactions AS
+ SELECT o.*
+ , CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html
+ FROM (
+ SELECT i.*
+ , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-'
+ THEN
+ 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1='
+ || ec.ec_number_1
+ || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END
+ || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END
+ || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END
+ ELSE reaction_url END as expasy_url
+ , ec.description as enzyme_description
+ FROM (
+ SELECT
+ reaction_id
+ , reaction_source_id
+ , reaction_url
+ , ext_db_name
+ , ext_db_version
+ , enzyme
+ , substrates_html || ' ' || sign || ' ' || products_html as equation_html
+ , substrates_text || ' ' || sign || ' ' || products_text as equation_text
+ , case when sign = '<=>' then 1 else 0 end as is_reversible
+ , substrates_text
+ , products_text
+ FROM (
+ SELECT
+ reaction_id
+ , reaction_source_id
+ , reaction_url
+ , ext_db_name
+ , ext_db_version
+ , enzyme
+ , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign
+ , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html
+ , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text
+ , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html
+ , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text
+ FROM (
+ WITH rep AS (
+ SELECT DISTINCT
+ pr.PATHWAY_REACTION_ID as reaction_id
+ , pr.SOURCE_ID as reaction_source_id
+ , pn.DISPLAY_LABEL as enzyme
+ , coalesce(ca.compound_name, pc.compound_source_id) as compound
+ , prel.is_reversible as is_reversible_og
+ , last_value(prel.is_reversible) OVER (partition by pr.pathway_reaction_id ORDER BY prel.is_reversible ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) as is_reversible
+ , first_value(pc.type) over (partition by pr.pathway_reaction_id, pr.SOURCE_ID, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE, coalesce(pc.chebi_accession, pc.compound_source_id), coalesce(ca.compound_name, pc.compound_source_id) ORDER BY pc.pathway_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as type
+ FROM
+ sres.pathway p
+ , apidb.pathwayreaction pr
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.ONTOLOGYTERM ot
+ , PathwayCompounds pc
+ LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.name = 'enzyme'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND pc.PATHWAY_ID = p.PATHWAY_ID
+ AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
+ )
+ SELECT DISTINCT
+ pr.PATHWAY_REACTION_ID as reaction_id
+ , pr.SOURCE_ID as reaction_source_id
+ , ed.NAME as ext_db_name
+ , edr.VERSION as ext_db_version
+ , cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme
+ , min(rep.is_reversible) as is_reversible
+ , min(rep.type) as type
+ , string_agg (pc.type, ',' order by p.pathway_id) as type_list
+ , coalesce(ca.compound_name, pc.compound_source_id) as compound
+ , CASE
+ WHEN coalesce(pc.CHEBI_ACCESSION, pc.compound_source_id) LIKE 'CHEBI%'
+ THEN '' || coalesce(ca.compound_name, pc.compound_source_id) || ''
+ ELSE coalesce(pc.chebi_accession, pc.compound_source_id)
+ END as compound_url
+ , CASE (replace (replace (ed.name, 'Pathways_', ''), '_RSRC', ''))
+ WHEN 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || pr.source_id
+ WHEN 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'FungiCyc' THEN NULL
+ END as reaction_url
+ FROM
+ sres.pathway p
+ , apidb.pathwayreaction pr
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.EXTERNALDATABASE ed
+ , SRES.EXTERNALDATABASERELEASE edr
+ , SRES.ONTOLOGYTERM ot
+ , rep
+ , PathwayCompounds pc
+ LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.name = 'enzyme'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND pc.EXT_DB_NAME = ed.NAME
+ AND pc.EXT_DB_VERSION = edr.VERSION
+ AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID
+ AND pc.PATHWAY_ID = p.PATHWAY_ID
+ AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
+ AND rep.reaction_id = pr.pathway_reaction_id
+ AND rep.reaction_source_id = pr.source_id
+ AND rep.compound = coalesce(ca.compound_name, pc.compound_source_id)
+ AND rep.enzyme = pn.DISPLAY_LABEL
+ AND rep.is_reversible_og = prel.is_reversible
+ GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE
+ , coalesce(pc.chebi_accession, pc.compound_source_id)
+ , coalesce(ca.compound_name, pc.compound_source_id)
+ ) t1
+ GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible
+ ) t2
+ ) i
+ LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number
+ ) o
+
+ ;
+
+
+
+ create index PathRcts_id_ix
+ on PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
new file mode 100644
index 0000000000..fe64a5fecf
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
@@ -0,0 +1,44 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE ChIPchipTranscript AS
+ SELECT DISTINCT ta.source_id, ta.gene_source_id, ta.project_id, sr.protocol_app_node_id,
+ CASE
+ WHEN ta.is_reversed = 0
+ THEN round(abs(ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0)
+ ELSE round(abs(ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0)
+ END as distance,
+ CASE
+ WHEN /* distance > 0 */
+ CASE WHEN ta.is_reversed = 0
+ THEN ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)
+ ELSE ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)
+ END > 0
+ THEN
+ CASE
+ WHEN ta.is_reversed = 0
+ THEN '-'
+ ELSE '+'
+ END
+ ELSE
+ CASE
+ WHEN ta.is_reversed = 1
+ THEN '-'
+ ELSE '+'
+ END
+ END as direction,
+ sr.score1 as score
+ FROM TranscriptAttributes ta,
+ Results.segmentresult sr,
+ Study.StudyLink sl,
+ Study.Study s
+ WHERE sr.na_sequence_id = ta.na_sequence_id
+ AND s.study_id = sl.study_id
+ AND sl.protocol_app_node_id = sr.protocol_app_node_id
+ AND lower(s.name) like '%chip%peaks'
+ AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000)
+ or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) )
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
new file mode 100644
index 0000000000..44e43c5b10
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
@@ -0,0 +1,7 @@
+
+
+ create index chpgene_geneid_idx ON ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
new file mode 100644
index 0000000000..d22fda9dd9
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
@@ -0,0 +1,19 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE ChrCopyNumbers AS
+ SELECT DISTINCT ta.na_sequence_id
+ , ta.chromosome
+ , ccn.chr_copy_number AS ploidy
+ , io.input_pan_id
+ , io.output_pan_id
+ FROM apidb.ChrCopyNumber ccn
+ , TranscriptAttributes ta
+ , PANIo io
+ WHERE ta.na_sequence_id = ccn.na_sequence_id
+ AND ta.chromosome IS NOT NULL
+ AND ccn.protocol_app_node_id = io.output_pan_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
new file mode 100644
index 0000000000..ab77efc977
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
@@ -0,0 +1,16 @@
+
+
+ CREATE INDEX ChrCN_ix
+ ON ChrCopyNumbers (input_pan_id, na_sequence_id)
+
+
+ ;
+
+
+
+ CREATE INDEX ChrCN_output
+ ON ChrCopyNumbers (output_pan_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/CodingSequence.psql b/Model/lib/psql/webtables/MO/CodingSequence.psql
new file mode 100644
index 0000000000..e1560b855f
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/CodingSequence.psql
@@ -0,0 +1,14 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE CodingSequence AS
+ SELECT ta.source_id, ta.project_id,
+ SUBSTR(sns.sequence, tf.translation_start::INTEGER,
+ tf.translation_stop::INTEGER - tf.translation_start::INTEGER + 1) as sequence
+ FROM TranscriptAttributes ta, dots.SplicedNaSequence sns, dots.TranslatedAaFeature tf
+ WHERE ta.source_id = sns.source_id
+ AND ta.na_feature_id = tf.na_feature_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
new file mode 100644
index 0000000000..26d8f1c327
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
@@ -0,0 +1,7 @@
+
+
+ create index CodSeq_ix on CodingSequence (source_id, project_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
new file mode 100644
index 0000000000..9d7b830c6d
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
@@ -0,0 +1,23 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE DatasetExampleSourceId AS
+ WITH profiles AS (
+ SELECT p.source_id,
+ ga.project_id,
+ ga.sequence_id,
+ d.name,
+ row_number() over(partition by d.name
+ order by ga.chromosome_order_num, p.profile_as_string desc) as rn
+ FROM Profile p
+ INNER JOIN sres.ExternalDatabase d ON p.dataset_name = d.name
+ LEFT JOIN GeneAttributes ga ON p.source_id = ga.source_id
+ WHERE p.profile_as_string is not null
+ )
+ SELECT p.source_id as example_source_id, p.project_id, p.sequence_id, p.name as dataset
+ FROM profiles p
+ WHERE p.rn = 1
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql b/Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/EqtlSpan.psql b/Model/lib/psql/webtables/MO/EqtlSpan.psql
new file mode 100644
index 0000000000..10215fca8e
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/EqtlSpan.psql
@@ -0,0 +1,25 @@
+:CREATE_AND_POPULATE
+
+
+ create table eqtlSpan as
+ SELECT gene_source_id, project_id, haplotype_block_name as hapblock_id, sequence_id,
+ start_min, end_max, start_max, end_min,
+ max(score) as lod_score, organism
+ FROM (SELECT ga.gene_source_id, ga.project_id, gls.haplotype_block_name,
+ ens.source_id as sequence_id, nl.start_min, nl.end_max, nl.start_max, nl.end_min,
+ gls.lod_score_mant * power(10::double precision, gls.lod_score_exp) as score,
+ replace (ga.organism, ' ', '+') as organism
+ FROM dots.ChromosomeElementFeature cef, apidb.NAFeatureHaploblock gls,
+ dots.ExternalNaSequence ens, dots.NaLocation nl, TranscriptAttributes ga
+ WHERE gls.na_feature_id = ga.gene_na_feature_id
+ AND cef.name = gls.haplotype_block_name
+ AND nl.na_feature_id = cef.na_feature_id
+ AND cef.na_sequence_id = ens.na_sequence_id
+ AND (gls.lod_score_mant * power(10::double precision, gls.lod_score_exp)) >= 1.5
+ ) t
+ GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name,
+ start_min, end_max, start_max, end_min, organism
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
new file mode 100644
index 0000000000..ef659c7e2a
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
@@ -0,0 +1,8 @@
+
+
+ create index eqtlSpan_ix
+ on eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
new file mode 100644
index 0000000000..1985445f0a
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
@@ -0,0 +1,68 @@
+
+
+ CREATE UNLOGGED TABLE EstAlignmentGene AS
+ SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession,
+ e.library_id, ba.query_taxon_id, ba.target_na_sequence_id,
+ ba.target_taxon_id, ba.percent_identity, ba.is_consistent,
+ ba.is_best_alignment, ba.is_reversed, ba.target_start, ba.target_end,
+ ga.sequence_id AS target_sequence_source_id,
+ least(ba.target_end, ga.gene_end_max)
+ - greatest(ba.target_start, ga.gene_start_min) + 1
+ AS est_gene_overlap_length,
+ ba.query_bases_aligned / (query_sequence.length)
+ * 100 AS percent_est_bases_aligned,
+ ga.gene_source_id AS gene
+ FROM dots.BlatAlignment ba, dots.Est e,
+ apidbtuning.TranscriptAttributes ga,
+ dots.NaSequence query_sequence
+ WHERE e.na_sequence_id = ba.query_na_sequence_id
+ AND ga.na_sequence_id = ba.target_na_sequence_id
+ AND least(ba.target_end, ga.gene_end_max) - greatest(ba.target_start, ga.gene_start_min) >= 0
+ AND query_sequence.na_sequence_id = ba.query_na_sequence_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE EstAlignmentNoGene AS
+ SELECT * from EstAlignmentGene WHERE 1=0 UNION /* define datatype for null column */
+ SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession,
+ e.library_id, ba.query_taxon_id, ba.target_na_sequence_id,
+ ba.target_taxon_id, ba.percent_identity, ba.is_consistent,
+ ba.is_best_alignment, ba.is_reversed, ba.target_start, ba.target_end,
+ sequence.source_id AS target_sequence_source_id,
+ NULL AS est_gene_overlap_length,
+ ba.query_bases_aligned / (query_sequence.length)
+ * 100 AS percent_est_bases_aligned,
+ NULL AS gene
+ FROM dots.BlatAlignment ba, dots.Est e, dots.AssemblySequence aseq,
+ dots.NaSequence sequence, dots.NaSequence query_sequence
+ WHERE e.na_sequence_id = ba.query_na_sequence_id
+ AND e.na_sequence_id = query_sequence.na_sequence_id
+ AND aseq.na_sequence_id = ba.query_na_sequence_id
+ AND ba.target_na_sequence_id = sequence.na_sequence_id
+ AND ba.blat_alignment_id IN
+ ( /* set of blat_alignment_ids not in in first leg of UNION */
+ /* (because they overlap no genes) */
+ SELECT ba.blat_alignment_id
+ FROM dots.BlatAlignment ba, dots.NaSequence query_sequence,
+ sres.OntologyTerm so
+ WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id
+ AND query_sequence.sequence_ontology_id = so.ontology_term_id
+ AND so.name = 'EST'
+ EXCEPT
+ SELECT blat_alignment_id FROM EstAlignmentGene)
+
+ ;
+
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE EstAlignmentGeneSummary AS
+ SELECT * FROM EstAlignmentNoGene
+ UNION
+ SELECT * FROM EstAlignmentGene
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql
new file mode 100644
index 0000000000..6dec9178b5
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql
@@ -0,0 +1,20 @@
+
+
+ create index EstSumm_libOverlap_ix
+ ON EstAlignmentGeneSummary
+ (library_id, percent_identity, is_consistent,
+ est_gene_overlap_length, percent_est_bases_aligned)
+
+
+ ;
+
+
+
+ create index EstSumm_estSite_ix
+ ON EstAlignmentGeneSummary
+ (target_sequence_source_id, target_start, target_end,
+ library_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/EstAttributes.psql b/Model/lib/psql/webtables/MO/EstAttributes.psql
new file mode 100644
index 0000000000..17da985e7f
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/EstAttributes.psql
@@ -0,0 +1,51 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE EstAttributes AS
+ SELECT
+ cast(apidb.project_id(tn.name) as varchar(20)) as project_id,
+ ens.source_id,
+ e.seq_primer AS primer,
+ ens.a_count,
+ ens.c_count,
+ ens.g_count,
+ ens.t_count,
+ (ens.length - (ens.a_count + ens.c_count + ens.g_count + ens.t_count))
+ AS other_count,
+ ens.length,
+ replace(l.dbest_name, '''', '-') as dbest_name,
+ coalesce(regexp_replace(l.vector, '^\s+$', null), 'unknown') AS vector,
+ coalesce(regexp_replace(l.stage, '^\s+$', null), 'unknown') AS stage,
+ SUBSTR(CASE
+ WHEN tn.name = 'Giardia lamblia' THEN 'Giardia Assemblage A isolate WB'
+ ELSE tn.name
+ END, 1, 100) AS organism,
+ taxon.ncbi_tax_id,
+ ed.name AS external_db_name,
+ coalesce(best.best_alignment_count, 0) AS best_alignment_count,
+ l.library_id, replace(l.dbest_name, '''', '-') as library_dbest_name
+ FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm,
+ sres.TaxonName tn, sres.ExternalDatabase ed,
+ sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens
+ LEFT JOIN
+ (select query_na_sequence_id,max(ct) as best_alignment_count
+ from (
+ SELECT query_na_sequence_id, COUNT(*) AS ct
+ FROM dots.BlatAlignment ba
+ WHERE is_best_alignment = 1
+ GROUP BY target_external_db_release_id,query_na_sequence_id) t
+ group by query_na_sequence_id
+ ) best ON ens.na_sequence_id = best.query_na_sequence_id
+ WHERE e.na_sequence_id = ens.na_sequence_id
+ AND e.library_id = l.library_id
+ AND ens.taxon_id = tn.taxon_id
+ AND ens.taxon_id = taxon.taxon_id
+ AND tn.name_class='scientific name'
+ AND ens.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ens.sequence_ontology_id = oterm.ontology_term_id
+ AND oterm.name = 'EST'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/EstAttributes_ix.psql b/Model/lib/psql/webtables/MO/EstAttributes_ix.psql
new file mode 100644
index 0000000000..3708681405
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/EstAttributes_ix.psql
@@ -0,0 +1,7 @@
+
+
+ create unique index EstAttr_source_id ON EstAttributes (source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/EstSequence.psql b/Model/lib/psql/webtables/MO/EstSequence.psql
new file mode 100644
index 0000000000..9dc3effb71
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/EstSequence.psql
@@ -0,0 +1,16 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE EstSequence AS
+ SELECT ens.source_id,
+ cast(apidb.project_id(tn.name) as varchar(20)) as project_id,
+ ens.sequence
+ FROM dots.ExternalNaSequence ens, sres.OntologyTerm oterm, sres.TaxonName tn
+ WHERE oterm.name = 'EST'
+ AND oterm.ontology_term_id = ens.sequence_ontology_id
+ AND ens.taxon_id = tn.taxon_id
+ AND tn.name_class = 'scientific name'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/EstSequence_ix.psql b/Model/lib/psql/webtables/MO/EstSequence_ix.psql
new file mode 100644
index 0000000000..b7010a62aa
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/EstSequence_ix.psql
@@ -0,0 +1,7 @@
+
+
+ create index EstSeq_ix on EstSequence (source_id, project_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneAttributes.psql b/Model/lib/psql/webtables/MO/GeneAttributes.psql
new file mode 100644
index 0000000000..8c78e847c9
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneAttributes.psql
@@ -0,0 +1,113 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVGeneAttributes AS
+ SELECT DISTINCT project_id
+ , ta.gene_source_id AS source_id
+ , gene_na_feature_id AS na_feature_id
+ , na_sequence_id
+ , is_reversed
+ , gene_start_min AS start_min
+ , gene_end_max AS end_max
+ , CASE strand WHEN 'forward' THEN '+' WHEN 'reverse' THEN '-' ELSE null END as strand_plus_minus
+ , sequence_id
+ , gene_name AS name
+ , COALESCE(aggregates.product, aggregates.transcript_product) as old_product
+ , COALESCE(gp.product, 'unspecified product') as product
+ , gene_type
+ , gene_ebi_biotype
+ , gene_id
+ , aggregates.is_pseudo
+ , organism
+ , species
+ , taxon_id
+ , species as genus_species
+ , strain
+ , ncbi_tax_id
+ , so_id
+ , so_term_name
+ , so_term_definition
+ , so_version
+ , anticodon
+ , external_db_name
+ , external_db_version
+ , external_db_rls_id
+ , chromosome
+ , chromosome_order_num
+ , sequence_type
+ , gene_transcript_count AS transcript_count
+ , gene_exon_count as exon_count
+ , gene_previous_ids as previous_ids
+ , is_deprecated
+ , gene_paralog_number as paralog_number
+ , gene_ortholog_number as ortholog_number
+ , gene_context_start as context_start
+ , gene_context_end as context_end
+ , orthomcl_name
+ , gene_total_hts_snps as total_hts_snps
+ , gene_hts_nonsynonymous_snps as hts_nonsynonymous_snps
+ , gene_hts_stop_codon_snps as hts_stop_codon_snps
+ , gene_hts_noncoding_snps as hts_noncoding_snps
+ , gene_hts_synonymous_snps as hts_synonymous_snps
+ , gene_hts_nonsyn_syn_ratio as hts_nonsyn_syn_ratio
+ , comment_string
+ , uniprot.uniprot_id
+ , uniprot.uniprot_id_internal
+ , gene_entrez_id as entrez_id
+ , representative_transcript
+ , gene_zoom_context_start as zoom_context_start
+ , gene_zoom_context_end as zoom_context_end
+ , cast (null as numeric) as strain_count
+ , ta.gene_locations as locations
+ FROM :ORG_ABBREVTranscriptAttributes ta
+ INNER JOIN (
+ SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo, MIN(gene_product) AS product,
+ substr(STRING_AGG(transcript_product, ',' order by transcript_product), 1, 240) as transcript_product
+ FROM :ORG_ABBREVTranscriptAttributes
+ GROUP BY gene_source_id
+ ) aggregates ON ta.gene_source_id = aggregates.gene_source_id
+ LEFT JOIN (
+ SELECT na_feature_id,
+ substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id,
+ substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal
+ FROM (
+ SELECT distinct t.parent_id as na_feature_id, dr.primary_identifier as uniprot_id
+ FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t,
+ sres.ExternalDatabase d, sres.ExternalDatabaseRelease r
+ WHERE dr.db_ref_id = x.DB_REF_ID
+ AND (x.na_feature_id = t.na_feature_id OR x.na_feature_id = t.parent_id)
+ AND dr.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND (d.name like '%uniprot_dbxref_RSRC'
+ OR d.name like '%dbxref_gene2Uniprot_RSRC'
+ OR d.name = 'Links to Uniprot Genes'
+ OR d.name like '%_dbxref_uniprot_from_annotation_RSRC')
+ ) t
+ GROUP BY na_feature_id
+ ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id
+ LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id
+ ORDER BY ta.gene_source_id
+
+
+:DECLARE_PARTITION;
+
+
+
+ CREATE TABLE :ORG_ABBREVSpeciesInfo as
+ SELECT genus_species, count(distinct organism) as strain_count
+ FROM :ORG_ABBREVGeneAttributes
+ GROUP BY genus_species
+
+ ;
+
+
+
+ UPDATE :ORG_ABBREVGeneAttributes ga
+ SET strain_count = (
+ SELECT strain_count
+ FROM :ORG_ABBREVSpeciesInfo si
+ WHERE si.genus_species = ga.genus_species
+ )
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
new file mode 100644
index 0000000000..a63551450d
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
@@ -0,0 +1,90 @@
+
+
+ CREATE UNIQUE INDEX GeneAttr_srcPrj
+ ON :ORG_ABBREVGeneAttributes (source_id)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_exon_ix
+ ON :ORG_ABBREVGeneAttributes (exon_count, source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_loc_ix
+ ON :ORG_ABBREVGeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_feat_ix
+ ON :ORG_ABBREVGeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_orthoname_ix ON :ORG_ABBREVGeneAttributes (
+ orthomcl_name, source_id, taxon_id, gene_type, na_feature_id,
+ na_sequence_id, start_min, end_max, organism, species,
+ product, project_id
+ )
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_ortholog_ix
+ ON :ORG_ABBREVGeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_orgsrc_ix
+ ON :ORG_ABBREVGeneAttributes (organism, source_id, na_sequence_id, start_min, end_max)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_prjsrc_ix
+ ON :ORG_ABBREVGeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0))
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_txid_ix
+ ON :ORG_ABBREVGeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_ids_ix
+ ON :ORG_ABBREVGeneAttributes (na_feature_id, source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneAttr_loc_intjunc_ix
+ ON :ORG_ABBREVGeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql
new file mode 100644
index 0000000000..ac518347bf
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql
@@ -0,0 +1,29 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GeneCopyNumbers AS
+ SELECT DISTINCT ta.project_id
+ , ta.source_id
+ , ta.gene_source_id
+ , REGEXP_REPLACE(pan.name, '_[A-Za-z0-9]+ (.+)$', '') AS strain
+ , gcn.haploid_number AS raw_estimate
+ , gcn.ref_copy_number AS ref_cn
+ , CASE WHEN (gcn.haploid_number < 0.01) THEN 0
+ WHEN (0.01 < gcn.haploid_number AND gcn.haploid_number < 1.85) THEN 1
+ ELSE round(gcn.haploid_number) END AS haploid_number
+ , ta.chromosome
+ , ta.na_sequence_id
+ , io.input_pan_id
+ , io.output_pan_id
+ FROM apidb.genecopynumber gcn
+ , study.protocolappnode pan
+ , TranscriptAttributes ta
+ , PANIo io
+ WHERE gcn.protocol_app_node_id = pan.protocol_app_node_id
+ AND gcn.na_feature_id = ta.gene_na_feature_id
+ AND gcn.protocol_app_node_id = io.output_pan_id
+ AND (ta.gene_type = 'protein coding' or ta.gene_type = 'protein coding gene')
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
new file mode 100644
index 0000000000..7acf69d823
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
@@ -0,0 +1,8 @@
+
+
+ CREATE INDEX GeneCN_ix
+ ON GeneCopyNumbers (input_pan_id, na_sequence_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneGoTable.psql b/Model/lib/psql/webtables/MO/GeneGoTable.psql
new file mode 100644
index 0000000000..b83c188e7f
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneGoTable.psql
@@ -0,0 +1,26 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GeneGoTable AS
+ SELECT source_id, project_id, go_id,
+ string_agg(transcript_source_id, ', ' order by transcript_source_id) as transcript_ids,
+ is_not,
+ max(go_term_name) as go_term_name, ontology, source, evidence_code,
+ reference, evidence_code_parameter, sort_key
+ FROM (SELECT DISTINCT ggt.gene_source_id as source_id, ga.project_id,
+ replace(ggt.go_id, 'GO_', 'GO:') as go_id,
+ ggt.transcript_source_id,
+ case when ggt.is_not = 'not' then 'Is not' else '' end AS is_not,
+ ggt.go_term_name, ggt.ontology, ggt.source, ggt.evidence_code,
+ ggt.reference, ggt.evidence_code_parameter,
+ substr(ggt.ontology, 1, 1) || replace(ggt.go_id, 'GO_', 'GO:') as sort_key
+ FROM GeneGoTerms ggt, GeneAttributes ga
+ WHERE ggt.gene_source_id = ga.source_id
+ ) t
+ GROUP BY source_id, project_id, go_id, is_not, ontology,
+ source, evidence_code, reference, evidence_code_parameter, sort_key
+ ORDER BY source_id, ontology, go_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
new file mode 100644
index 0000000000..de449a0e54
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
@@ -0,0 +1,9 @@
+
+
+ create index ggtab_ix ON GeneGoTable
+ (source_id, project_id, go_id, transcript_ids, is_not, go_term_name,
+ ontology, source, evidence_code, reference, evidence_code_parameter, sort_key)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms.psql b/Model/lib/psql/webtables/MO/GeneGoTerms.psql
new file mode 100644
index 0000000000..4cd39239a2
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneGoTerms.psql
@@ -0,0 +1,39 @@
+:CREATE_AND_POPULATE
+
+
+ create table :ORG_ABBREVGeneGoTerms as
+ with root_term
+ as (select ontology_term_id,
+ cast(initcap(replace(name, '_', ' ')) as varchar(20)) as ontology
+ from sres.OntologyTerm
+ where source_id in ('GO_0008150','GO_0003674','GO_0005575'))
+ select gf.source_id as gene_source_id, t.source_id as transcript_source_id, taf.aa_sequence_id,
+ cast (CASE ga.is_not WHEN 0 THEN '' WHEN 1 THEN 'not' ELSE ga.is_not::varchar END as varchar(3)) as is_not, ns.taxon_id,
+ cast (gt.source_id as varchar(20)) as go_id,
+ gt.ontology_term_id as go_term_id, rt.ontology,
+ cast(gt.name as varchar(250)) as go_term_name,
+ cast(gail.name as varchar(24)) as source,
+ cast(gec.name as varchar(12)) as evidence_code,
+ cast(gaiec.reference as varchar(250)) as reference,
+ cast(gaiec.evidence_code_parameter as varchar(80))as evidence_code_parameter
+ from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf, dots.GoAssociation ga,
+ dots.GoAssociationInstance gai, dots.GoAssociationInstanceLoe gail,
+ dots.GoAssocInstEvidCode gaiec, sres.OntologyTerm gec, dots.NaSequence ns,
+ sres.OntologyTerm gt LEFT JOIN root_term rt ON gt.ancestor_term_id = rt.ontology_term_id
+ where t.parent_id = gf.na_feature_id
+ and gf.na_sequence_id = ns.na_sequence_id
+ and (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+ and t.na_feature_id = taf.na_feature_id
+ and taf.aa_sequence_id = ga.row_id
+ and ga.table_id = (select table_id
+ from core.TableInfo
+ where name = 'TranslatedAASequence')
+ and ga.go_term_id = gt.ontology_term_id
+ and ga.go_association_id = gai.go_association_id
+ and gai.go_assoc_inst_loe_id = gail.go_assoc_inst_loe_id
+ and gai.go_association_instance_id = gaiec.go_association_instance_id
+ and gaiec.go_evidence_code_id = gec.ontology_term_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
new file mode 100644
index 0000000000..8bb63eb7e4
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
@@ -0,0 +1,10 @@
+
+
+ create index ggt_ix ON :ORG_ABBREVGeneGoTerms
+ (gene_source_id, transcript_source_id, ontology, go_id, go_term_id,
+ go_term_name, source, evidence_code, reference,
+ evidence_code_parameter, aa_sequence_id, is_not)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneId.psql b/Model/lib/psql/webtables/MO/GeneId.psql
new file mode 100644
index 0000000000..41e89c83f0
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneId.psql
@@ -0,0 +1,265 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVGeneId AS
+ SELECT substr(mapping.id, 1, 100) as id, mapping.gene, cast (0 as NUMERIC(1)) as unique_mapping,
+ SUBSTR(string_agg(distinct union_member,'; ' order by union_member), 1, 100) as union_member,
+ SUBSTR(string_agg(distinct database_name,'; ' order by database_name), 1, 200) as database_name
+ FROM (SELECT substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) AS id,
+ gf.source_id AS gene,
+ 'Transcript.protein_id before dot' as union_member, ed.name as database_name /* dots.Transcript.protein_id, trimmed at period */
+ FROM dots.Transcript t, dots.GeneFeature gf,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE t.parent_id = gf.na_feature_id
+ AND substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) IS NOT NULL
+ AND gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT t.protein_id AS id,
+ gf.source_id AS gene,
+ 'Transcript.protein_id' as union_member, ed.name as database_name /* dots.Transcript.protein_id */
+ FROM dots.Transcript t, dots.GeneFeature gf,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE t.parent_id = gf.na_feature_id
+ AND t.protein_id IS NOT NULL
+ AND gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
+ FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ -- CHECK AND FIX
+ --AND NOT (ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity')
+ -- AND NOT REGEXP_LIKE (dr.primary_identifier, '\D') )
+ AND NOT ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity')
+ AND NOT edr.id_type = 'synonym'
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'DbRef.primary_identifier on Transcript' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
+ FROM dots.GeneFeature gf, dots.Transcript t, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = t.parent_id
+ AND t.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ -- CHECK AND FIX
+ --AND NOT (ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity')
+ -- AND NOT REGEXP_LIKE (dr.primary_identifier, '\D') )
+ AND NOT ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity')
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'DbRef.primary_identifier on Gene' as union_member, ed.name as database_name
+ FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name ='RefSeq_gene_name'
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'VectorBase alternate names' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
+ FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ and ed.name in ('VB_Community_Annotation', 'VB_Community_Symbol') /* vectorbase alt names */
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'synonym' as union_member, ed.name as database_name
+ FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND edr.id_type = 'synonym'
+ UNION
+ SELECT dr.secondary_identifier AS id,
+ gf.source_id AS gene,
+ 'DbRef.secondary_identifier' as union_member, ed.name as database_name /* sres.DbRef.secondary_identifier */
+ FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.secondary_identifier IS NOT NULL
+ AND gf.na_feature_id = drnf.na_feature_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name IN ('NRDB_gb_dbXRefBySeqIdentity',
+ 'NRDB_pdb_dbXRefBySeqIdentity',
+ 'NRDB_ref_dbXRefBySeqIdentity',
+ 'NRDB_sp_dbXRefBySeqIdentity',
+ 'Predicted protein structures','Pf_predictedProteinStructures_RSRC',
+ 'GenBank')
+ UNION
+ SELECT dr.primary_identifier AS id,
+ gf.source_id AS gene,
+ 'genbank DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier for Genbank records */
+ FROM dots.GeneFeature gf, dots.Transcript t, dots.DbRefNaSequence drns,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE gf.na_feature_id = t.parent_id
+ AND t.na_sequence_id = drns.na_sequence_id
+ AND drns.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name = 'GenBank'
+ UNION
+ SELECT pred_loc.feature_source_id AS id,
+ gene_loc.feature_source_id AS gene,
+ 'overlapping predicted gene source_id' as union_member, ed.name as database_name /* dots.GeneFeature.source_id for predicted genes that overlap */
+ FROM apidb.FeatureLocation gene_loc, apidb.FeatureLocation pred_loc,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE pred_loc.feature_type = 'GenePrediction'
+ AND gene_loc.feature_type = 'GeneFeature'
+ AND pred_loc.na_sequence_id = gene_loc.na_sequence_id
+ AND gene_loc.start_min <= pred_loc.end_max
+ AND gene_loc.end_max >= pred_loc.start_min
+ AND pred_loc.is_reversed = gene_loc.is_reversed
+ AND pred_loc.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT ng.name AS id, gf.source_id AS gene,
+ 'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */
+ FROM dots.GeneFeature gf, dots.NaFeatureNaGene nfng, dots.NaGene ng,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE gf.na_feature_id = nfng.na_feature_id
+ AND ng.na_gene_id = nfng.na_gene_id
+ AND gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT source_id AS id, source_id AS gene,
+ 'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */
+ FROM dots.GeneFeature gf,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT n.name AS id, gf.source_id AS gene,
+ 'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name
+ from dots.genefeature gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
+ ( select na_feature_id, name
+ from apidb.GeneFeatureName
+ where is_preferred = 1
+ EXCEPT
+ -- suppress gene/name associations from the *DELETED_RSRC databases
+ select gfn.na_feature_id, gfn.name
+ from apidb.GeneFeatureName gfn,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ where gfn.external_database_release_id = edr.external_database_release_id
+ and ed.external_database_id = edr.external_database_id
+ and ed.name like '%DELETED_RSRC'
+ ) n
+ where n.na_feature_id = gf.na_feature_id
+ and gf.external_database_release_id = r.external_database_release_id
+ and r.external_database_id = d.external_database_id
+ UNION
+ select dr.primary_identifier as id,
+ gf.source_id as gene,
+ 'AA feature DbRef primary ID' as union_member,
+ ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */
+ from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf,
+ dots.DbRefAaFeature draf, sres.DbRef dr,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ where gf.na_feature_id = t.parent_id
+ and t.na_feature_id = taf.na_feature_id
+ and taf.aa_feature_id = draf.aa_feature_id
+ and draf.db_ref_id = dr.db_ref_id
+ and dr.external_database_release_id = edr.external_database_release_id
+ and edr.external_database_id = ed.external_database_id
+ and ed.name
+ not in ('INTERPRO', 'PFAM', 'PIRSF', 'PRODOM', 'PROSITEPROFILES',
+ 'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER',
+ 'PRINTS','SCANPROSITE','SFLD')
+ ) mapping,
+ dots.GeneFeature gf, dots.NaSequence ns
+ WHERE mapping.gene = gf.source_id
+ AND gf.na_sequence_id = ns.na_sequence_id
+ AND (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+ AND (gf.is_predicted != 1 OR gf.is_predicted is null)
+ GROUP BY mapping.id, mapping.gene
+
+
+:DECLARE_PARTITION;
+
+
+
+ INSERT INTO :ORG_ABBREVGeneId
+ (id, gene, unique_mapping, union_member, database_name)
+ WITH munge
+ AS (SELECT DISTINCT
+ regexp_replace(id, '\.\d\d?$', '') as id,
+ gene, unique_mapping, union_member, database_name
+ FROM :ORG_ABBREVGeneId
+ -- CHECK AND FIX
+ --WHERE regexp_like(id, '(.*)\.\d\d?$')
+ )
+ SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name
+ FROM munge
+ WHERE id NOT IN (SELECT id FROM :ORG_ABBREVGeneId )
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE :ORG_ABBREVOneGeneIds (lower_id) AS
+ SELECT lower_id
+ FROM (SELECT DISTINCT lower(id) as lower_id, gene
+ FROM :ORG_ABBREVGeneId
+ ) t
+ GROUP BY lower_id
+ HAVING count(*) = 1
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX gix _pk ON :ORG_ABBREVOneGeneIds (lower_id)
+
+
+ ;
+
+
+
+ UPDATE :ORG_ABBREVGeneId
+ SET unique_mapping = 1
+ WHERE id = gene
+
+ ;
+
+
+
+ UPDATE :ORG_ABBREVGeneId
+ SET unique_mapping = 1
+ WHERE lower(id) IN (select lower_id from :ORG_ABBREVOneGeneIds)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneId_ix.psql b/Model/lib/psql/webtables/MO/GeneId_ix.psql
new file mode 100644
index 0000000000..ef84acad08
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneId_ix.psql
@@ -0,0 +1,35 @@
+
+
+ CREATE INDEX GeneId_gene_idx ON :ORG_ABBREVGeneId (gene, id)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneId_id_idx ON :ORG_ABBREVGeneId (id, gene)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneId_uniqid_idx ON :ORG_ABBREVGeneId (unique_mapping, id, gene)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneId_lowid_idx ON :ORG_ABBREVGeneId (lower(id), gene)
+
+
+ ;
+
+
+
+ CREATE INDEX GeneId_uniqlowid_idx ON :ORG_ABBREVGeneId (unique_mapping, lower(id), gene)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats.psql
new file mode 100644
index 0000000000..b8a8e7dbcf
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats.psql
@@ -0,0 +1,25 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GeneIntJuncStats AS
+ WITH org_tot AS (
+ SELECT
+ gs.organism,
+ min(gij.total_unique) as min_annot_score, PERCENTILE_cont(0.005) within group (order by gij.total_unique asc) as perc005_annot_score,
+ PERCENTILE_cont(0.01) within group (order by gij.total_unique asc) as perc01_annot_score,
+ min(gij.percent_max) as min_annot_percent_max, PERCENTILE_cont(0.0001) within group (order by gij.percent_max asc) as perc0001_annot_percent_max,
+ PERCENTILE_cont(0.0005) within group (order by gij.percent_max asc) as perc0005_annot_percent_max,
+ floor(max(gij.segment_end - gij.segment_start) * 1.25) as max_intron_length
+ FROM geneintronjunction gij, genomicseqattributes gs
+ WHERE gs.na_sequence_id = gij.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ GROUP BY gs.organism
+ )
+ SELECT gs.na_sequence_id, gs.source_id, ot.*
+ FROM genomicseqattributes gs, org_tot ot
+ WHERE gs.organism = ot.organism
+ AND gs.na_sequence_id in (SELECT DISTINCT na_sequence_id FROM apidb.intronjunction)
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
new file mode 100644
index 0000000000..d2d463f8a2
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
@@ -0,0 +1,6 @@
+
+
+ create index GeneIntJuncStat_ix on GeneIntJuncStats (na_sequence_id)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction.psql
new file mode 100644
index 0000000000..1f42b544b4
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneIntronJunction.psql
@@ -0,0 +1,223 @@
+
+
+ CREATE UNLOGGED TABLE PanIOgij AS
+ SELECT i.protocol_app_id, i.protocol_app_node_id as input_pan_id, o.protocol_app_node_id as output_pan_id
+ FROM study.output o, study.input i
+ WHERE
+ o.protocol_app_node_id in (
+ SELECT DISTINCT protocol_app_node_id
+ FROM study.protocolappnode where name like '%junctions%' or name like '%htseq%'
+ )
+ AND o.protocol_app_id = i.protocol_app_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE annotgij (
+ na_sequence_id,
+ start_min,
+ end_max,
+ is_reversed,
+ feature_type
+ ) AS
+ SELECT il.na_sequence_id, il.start_min,il.end_max, il.is_reversed, 'Intron' as feature_type
+ FROM apidb.IntronLocation il
+ GROUP by il.na_sequence_id, il.start_min,il.end_max,il.is_reversed
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX annottmpnew_pk_ix ON annotgij (na_sequence_id,start_min,end_max,is_reversed,feature_type)
+
+
+ ;
+
+
+
+ create table GeneIdLocGIJ (
+ na_sequence_id NUMERIC(10),
+ start_min NUMERIC,
+ is_reversed NUMERIC,
+ end_max NUMERIC,
+ na_feature_id NUMERIC(10),
+ source_id varchar(100),
+ total_expression NUMERIC
+ )
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX gnattidloc_pk_ix ON GeneIdLocGIJ (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression)
+
+
+ ;
+
+
+
+ DO $$
+ DECLARE
+ idlist RECORD;
+ BEGIN
+ FOR idlist IN ( SELECT DISTINCT na_sequence_id FROM apidb.intronjunction)
+ LOOP
+ INSERT INTO GeneIdLocGIJ (
+ SELECT gf.na_sequence_id,l.start_min,l.is_reversed,l.end_max,gf.na_feature_id,gf.source_id,
+ round(sum(nafe.value)::NUMERIC,2) as total_expression
+ FROM dots.genefeature gf, dots.nalocation l, namemappinggij je, results.nafeatureexpression nafe
+ WHERE gf.na_sequence_id = idlist.na_sequence_id
+ AND l.na_feature_id = gf.na_feature_id
+ AND gf.na_feature_id = nafe.na_feature_id
+ AND nafe.protocol_app_node_id = je.exp_pan_id
+ GROUP BY gf.na_sequence_id,l.start_min,l.is_reversed,l.end_max,gf.na_feature_id,gf.source_id
+ );
+ commit;
+ END LOOP;
+ END;
+ $$ LANGUAGE PLPGSQL;
+
+ ;
+
+
+
+ create index gnidloc_nafid_ix on GeneIdLocGIJ (na_feature_id)
+
+ ;
+
+
+
+ create UNLOGGED table GIJtmp (
+ NA_SEQUENCE_ID NUMERIC(10),
+ SEQUENCE_SOURCE_ID VARCHAR(100),
+ SEGMENT_START NUMERIC,
+ SEGMENT_END NUMERIC,
+ TOTAL_UNIQUE NUMERIC,
+ TOTAL_ISRPM NUMERIC,
+ IS_REVERSED NUMERIC(1),
+ INTRON_FEATURE_ID VARCHAR(200),
+ MATCHES_GENE_STRAND NUMERIC,
+ GENE_SOURCE_ID VARCHAR(100),
+ GENE_NA_FEATURE_ID NUMERIC,
+ ANNOTATED_INTRON VARCHAR(10)
+ )
+
+ ;
+
+
+
+ DO $$
+ DECLARE
+ iter_length numeric := 4999;
+ i_first_pos numeric := 1;
+ i_last_pos numeric := i_first_pos + iter_length;
+ idlist RECORD;
+ BEGIN
+ FOR idlist IN (
+ SELECT na_sequence_id, length, taxon_id, CASE WHEN step_mult > 500000 THEN 500000 ELSE step_mult END as seq_step_mult
+ FROM (
+ SELECT gs.na_sequence_id, gs.length, gs.taxon_id, 25000 * (1 + floor(gs.length/count(*))) as step_mult
+ FROM apidb.intronjunction ij, dots.nasequence gs
+ WHERE gs.na_sequence_id = ij.na_sequence_id
+ GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id
+ ) t
+ ORDER BY taxon_id
+ )
+ LOOP
+ iter_length := idlist.seq_step_mult;
+ i_first_pos := 1;
+ i_last_pos := i_first_pos + iter_length;
+ WHILE i_first_pos < idlist.length
+ LOOP
+ INSERT INTO GIJtmp
+ SELECT DISTINCT
+ junc.*,
+ CASE
+ WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed
+ THEN 1
+ ELSE 0
+ END as matches_gene_strand,
+ last_value(ga.source_id) over w1 as gene_source_id,
+ last_value(ga.na_feature_id) over w1 as gene_na_feature_id,
+ CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron
+ FROM (
+ SELECT ij.na_sequence_id,seq.source_id as sequence_source_id,ij.segment_start,ij.segment_end,
+ sum(ij.unique_reads) as total_unique, round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm,
+ ij.is_reversed,seq.source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id
+ FROM apidb.intronjunction ij, namemappinggij je, dots.nasequence seq
+ WHERE ij.na_sequence_id = idlist.na_sequence_id
+ AND ij.segment_start between i_first_pos and i_last_pos
+ AND ij.na_sequence_id = seq.na_sequence_id
+ AND ij.unique_reads >= 1
+ AND je.junctions_pan_id = ij.protocol_app_node_id
+ AND je.multiplier < 20
+ GROUP BY ij.na_sequence_id,ij.segment_start,ij.segment_end, ij.is_reversed,seq.source_id
+ ) junc
+ LEFT JOIN GeneIdLocGIJ ga ON
+ junc.na_sequence_id = ga.na_sequence_id
+ AND junc.segment_start >= ga.start_min
+ AND junc.segment_end <= ga.end_max
+ AND junc.is_reversed = ga.is_reversed
+ LEFT JOIN annotgij ag ON
+ junc.na_sequence_id = ag.na_sequence_id
+ AND junc.segment_start = ag.start_min
+ AND junc.segment_end = ag.end_max
+ AND junc.is_reversed = ag.is_reversed
+ WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron')
+ WINDOW w1 AS (
+ PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type
+ ORDER BY ga.total_expression ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ ;
+ commit;
+ i_first_pos := i_last_pos + 1;
+ i_last_pos := i_first_pos + iter_length;
+ END LOOP;
+ END LOOP;
+ END;
+ $$ LANGUAGE PLPGSQL;
+
+ ;
+
+
+
+ create index gijtmp_gnscid_ix on gijtmp (gene_source_id)
+
+ ;
+
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GeneIntronJunction AS
+ SELECT
+ junc.*, CASE WHEN maxv.gene_source_id is not null and maxv.max_isrpm > 0 THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) ELSE null END as percent_max,
+ CASE WHEN maxv.gene_source_id is not null THEN 1 ELSE 0 END as contained,
+ CAST (null as numeric(10)) as taxon_id,
+ cast (null as numeric(10)) as upstream_gene_id,
+ cast (null as numeric) as upstream_distance,
+ cast (null as numeric(10)) as downstream_gene_id,
+ cast (null as numeric) as downstream_distance
+ FROM
+ gijtmp junc LEFT JOIN
+ (
+ SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm
+ FROM gijtmp
+ WHERE gene_source_id is not null
+ GROUP BY gene_source_id
+ ) maxv ON junc.gene_source_id = maxv.gene_source_id
+
+
+:DECLARE_PARTITION;
+
+
+
+ UPDATE GeneIntronJunction gij
+ SET taxon_id
+ = (SELECT taxon_id
+ FROM dots.NaSequence
+ WHERE na_sequence_id = gij.na_sequence_id)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
new file mode 100644
index 0000000000..90f98ad9cf
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
@@ -0,0 +1,21 @@
+
+
+ create index gijnew_loc_ix on GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
+
+ ;
+
+
+
+ create index gijnew_gnscid_ix on GeneIntronJunction (intron_feature_id)
+
+ ;
+
+
+
+ create index gijnew_txnloc_ix
+ on GeneIntronJunction
+ (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed,
+ total_unique, total_isrpm, annotated_intron)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneLocations.psql b/Model/lib/psql/webtables/MO/GeneLocations.psql
new file mode 100644
index 0000000000..aae59b60e7
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneLocations.psql
@@ -0,0 +1,22 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVGeneLocations AS
+ SELECT source_id,
+ string_agg(location, '; ' order by is_top_level desc) as locations
+ FROM (SELECT fl.feature_source_id as source_id, fl.is_top_level,
+ fl.sequence_source_id || ':'
+ || trim(to_char(fl.start_min,'999,999,999')) || '..'
+ || trim(to_char(fl.end_max,'999,999,999')) || '('
+ || CASE coalesce(fl.is_reversed, 0) WHEN 0 THEN '+' WHEN 1 THEN '-' ELSE fl.is_reversed::varchar END
+ || ')' as location
+ FROM apidb.FeatureLocation fl, dots.NaSequence ns
+ WHERE fl.feature_type = 'GeneFeature'
+ AND fl.na_sequence_id = ns.na_sequence_id
+ AND (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+ ) t
+ GROUP BY source_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
new file mode 100644
index 0000000000..80b09a0fa2
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
@@ -0,0 +1,8 @@
+
+
+ create index gloc_ix
+ on :ORG_ABBREVGeneLocations (source_id, locations)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql
new file mode 100644
index 0000000000..0025d7d161
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql
@@ -0,0 +1,47 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GeneMaxIntronGIJ (
+ protocol_app_node_id NUMERIC(10),
+ gene_source_id VARCHAR(200),
+ max_unique NUMERIC,
+ max_isrpm NUMERIC,
+ sum_unique NUMERIC,
+ sum_isrpm NUMERIC,
+ avg_unique NUMERIC,
+ avg_isrpm NUMERIC
+ )
+
+
+:DECLARE_PARTITION;
+
+
+
+ DO $$
+ DECLARE
+ idlist RECORD;
+ BEGIN
+ FOR idlist IN (
+ SELECT DISTINCT na_sequence_id
+ FROM GeneIdLocGIJ
+ )
+ LOOP
+ INSERT INTO GeneMaxIntronGIJ (
+ SELECT j.protocol_app_node_id, ga.source_id, max(unique_reads) as max_unique, max(round(j.unique_reads * mult.multiplier,2)) as max_isrpm,
+ sum(unique_reads) as sum_unique, sum(round(j.unique_reads * mult.multiplier,2)) as sum_isrpm, avg(unique_reads) as avg_unique, avg(round(j.unique_reads * mult.multiplier,2)) as avg_isrpm
+ FROM apidb.intronjunction j, GeneIdLocGIJ ga, namemappinggij mult
+ WHERE ga.na_sequence_id = idlist.na_sequence_id
+ AND ga.na_sequence_id = j.na_sequence_id
+ AND ga.start_min <= j.segment_start
+ AND ga.end_max >= j.segment_end
+ AND ga.is_reversed = j.is_reversed
+ AND j.protocol_app_node_id = mult.junctions_pan_id
+ GROUP BY j.protocol_app_node_id, ga.source_id
+ );
+ commit;
+ END LOOP;
+ END;
+ $$ LANGUAGE PLPGSQL;
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
new file mode 100644
index 0000000000..2a734e98a7
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
@@ -0,0 +1,6 @@
+
+
+ CREATE INDEX GnMxIntGIJ_ix on GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneModelDump.psql b/Model/lib/psql/webtables/MO/GeneModelDump.psql
new file mode 100644
index 0000000000..698e4fc44f
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneModelDump.psql
@@ -0,0 +1,32 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GeneModelDump as
+ SELECT source_id, project_id, sequence_id, gm_start,gm_end, type, is_reversed,
+ string_agg(transcript_id, ',' ORDER BY transcript_id) AS transcript_ids
+ FROM (
+ SELECT distinct ta.source_id as transcript_id, ta.gene_source_id as source_id, ta.project_id,
+ ta.sequence_id,gm.start_min as gm_start, gm.end_max as gm_end,
+ gm.type, gl.is_reversed
+ FROM
+ apidb.FeatureLocation gl, dots.NaSequence s,
+ TranscriptAttributes ta,
+ (
+ SELECT CASE el.feature_type WHEN 'ExonFeature' THEN 'Exon' ELSE el.feature_type END as type,
+ el.parent_id as na_feature_id, el.start_min as start_min, el.end_max as end_max
+ FROM apidb.FeatureLocation el
+ WHERE el.feature_type in ('ExonFeature','five_prime_UTR', 'three_prime_UTR','CDS','Intron')
+ AND el.is_top_level = 1
+ ) gm
+ WHERE gm.na_feature_id = ta.na_feature_id
+ AND s.na_sequence_id = gl.na_sequence_id
+ AND ta.na_feature_id = gl.na_feature_id
+ AND gl.is_top_level = 1
+ ) t
+ GROUP BY source_id, project_id, sequence_id,
+ gm_start, gm_end, type, is_reversed
+ ORDER BY CASE WHEN is_reversed = 1 THEN -1 * gm_start ELSE gm_start END
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
new file mode 100644
index 0000000000..89fc3c5b8e
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
@@ -0,0 +1,9 @@
+
+
+ create index gmd_ix
+ on GeneModelDump
+ (source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql b/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql
new file mode 100644
index 0000000000..c691f5b8ce
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql
@@ -0,0 +1,14 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GeneSummaryFilter AS
+ SELECT CAST(filter_name AS VARCHAR(80)) AS filter_name
+ FROM (SELECT species as filter_name
+ FROM GeneAttributes
+ UNION
+ SELECT organism as filter_name
+ FROM GeneAttributes) t
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql b/Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
new file mode 100644
index 0000000000..3b5c7cbe91
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
@@ -0,0 +1,26 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GenomicSequenceId AS
+ SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence
+ FROM (
+ SELECT ns.source_id as id, ns.source_id as sequence
+ FROM dots.NaSequence ns, sres.OntologyTerm oterm
+ WHERE ns.sequence_ontology_id = oterm.ontology_term_id
+ AND oterm.name in ('random_sequence', 'contig', 'supercontig', 'chromosome','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle','kinetoplast')
+ UNION
+ SELECT dr.primary_identifier AS id, ns.source_id AS sequence
+ FROM dots.NaSequence ns, dots.DbRefNaSequence drnf,
+ sres.DbRef dr, sres.ExternalDatabaseRelease edr,
+ sres.ExternalDatabase ed
+ WHERE dr.primary_identifier IS NOT NULL
+ AND ns.na_sequence_id = drnf.na_sequence_id
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id
+ = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ ) subquery1
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
new file mode 100644
index 0000000000..90bc21c3d6
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
@@ -0,0 +1,21 @@
+
+
+ CREATE INDEX GenSeqId_sequence_idx ON GenomicSequenceId (sequence, id)
+
+
+ ;
+
+
+
+ CREATE INDEX GenSeqId_id_idx ON GenomicSequenceId (id, sequence)
+
+
+ ;
+
+
+
+ CREATE INDEX GenSeqId_lowid_idx ON GenomicSequenceId (lower(id), sequence)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
new file mode 100644
index 0000000000..be8a73a415
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
@@ -0,0 +1,14 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE GenomicSequenceSequence AS
+ SELECT sa.source_id, cast(apidb.project_id(tn.name) as varchar(20)) as project_id,
+ ns.sequence
+ FROM GenomicSeqAttributes sa, dots.NaSequence ns, sres.TaxonName tn
+ WHERE sa.na_sequence_id = ns.na_sequence_id
+ AND ns.taxon_id = tn.taxon_id
+ AND tn.name_class = 'scientific name'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
new file mode 100644
index 0000000000..d68fd68292
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
@@ -0,0 +1,7 @@
+
+
+ create index GenomicSeq_ix on GenomicSequenceSequence (source_id, project_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/GoTermSummary.psql b/Model/lib/psql/webtables/MO/GoTermSummary.psql
new file mode 100644
index 0000000000..7757364b49
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GoTermSummary.psql
@@ -0,0 +1,56 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVGoTermSummary AS
+ SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
+ ggt.taxon_id, ggt.is_not, replace(ggt.go_id, '_', ':') as go_id,
+ ggt.go_term_id, ggt.ontology,
+ replace(ggt.go_term_name, '_',' ') as go_term_name, ggt.source,
+ ggt.evidence_code,
+ CASE ggt.evidence_code WHEN 'IEA' THEN 'Computed' ELSE 'Curated' END as evidence_category,
+ ggt.reference, ggt.evidence_code_parameter,
+ ol.min_depth as depth,
+ case
+ when gs.ontology_term_id is null then 0
+ else 1
+ end as is_go_slim
+ FROM :ORG_ABBREVGeneGoTerms ggt
+ LEFT JOIN :ORG_ABBREVOntologyLevels ol ON ggt.go_term_id = ol.ontology_term_id
+ LEFT JOIN (
+ SELECT distinct ontology_term_id
+ FROM apidb.GoSubset
+ WHERE go_subset_term = 'goslim_generic'
+ ) gs ON ggt.go_term_id = gs.ontology_term_id
+ UNION
+ SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
+ ggt.taxon_id, ggt.is_not,
+ replace (substr(ot.source_id, 1, 25),'_',':') as go_id,
+ ot.ontology_term_id as go_term_id, ggt.ontology,
+ replace (substr(ot.name, 1,250),'_',' ') as go_term_name,
+ ggt.source, ggt.evidence_code,
+ CASE ggt.evidence_code WHEN 'IEA' THEN 'Computed' ELSE 'Curated' END as evidence_category,
+ ggt.reference,
+ ggt.evidence_code_parameter,
+ ol.min_depth as depth,
+ case
+ when gs.ontology_term_id is null then 0
+ else 1
+ end as is_go_slim
+ FROM :ORG_ABBREVGeneGoTerms ggt, sres.OntologyRelationship orel,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr,
+ sres.OntologyTerm ot
+ LEFT JOIN :ORG_ABBREVOntologyLevels ol ON ot.ontology_term_id = ol.ontology_term_id
+ LEFT JOIN (
+ SELECT distinct ontology_term_id
+ FROM apidb.GoSubset
+ WHERE go_subset_term = 'goslim_generic'
+ ) gs ON ot.ontology_term_id = gs.ontology_term_id
+ WHERE ggt.go_term_id = orel.subject_term_id
+ AND orel.object_term_id = ot.ontology_term_id
+ AND edr.external_database_release_id = ot.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name ='GO_RSRC'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
new file mode 100644
index 0000000000..55b2bbf76d
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
@@ -0,0 +1,16 @@
+
+
+ create index GoTermSum_aaSeqId_idx ON :ORG_ABBREVGoTermSummary (aa_sequence_id, go_id, source)
+
+
+ ;
+
+
+
+ create index GoTermSum_plugin_ix ON :ORG_ABBREVGoTermSummary
+ (ontology, gene_source_id, is_not, is_go_slim,
+ go_id, go_term_name, evidence_code, evidence_category)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/IntronSupportLevel.psql b/Model/lib/psql/webtables/MO/IntronSupportLevel.psql
new file mode 100644
index 0000000000..f9895a117e
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/IntronSupportLevel.psql
@@ -0,0 +1,119 @@
+:CREATE_AND_POPULATE
+
+
+ create table :ORG_ABBREVIntronSupportLevel as
+ SELECT * FROM (
+ SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-high') as string_value
+ FROM (
+ SELECT gene_source_id
+ , ontology_term
+ , case when count(*) = intron_count THEN 'All'
+ when count(*) = 0 THEN 'None'
+ else 'Any' end as string_value
+ FROM (
+ SELECT gij.gene_source_id
+ , 'intron_junction' as ontology_term
+ , intronCount.intron_count
+ FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
+ , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
+ WHERE gij.gene_source_id = intronCount.source_id
+ and gij.na_sequence_id = stats.na_sequence_id
+ and gij.annotated_intron = 'Yes'
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= 2 /*stats.perc0005_annot_percent_max*/)
+ ) t
+ GROUP BY gene_source_id, ontology_term, intron_count
+ ) t
+ WHERE string_value = 'All'
+
+ UNION
+
+ SELECT gene_source_id
+ , ontology_term
+ , case when count(*) = intron_count THEN 'All-high'
+ when count(*) = 0 THEN 'None'
+ else 'Any-high' end as string_value
+ FROM (
+ SELECT gij.gene_source_id
+ , 'intron_junction' as ontology_term
+ , intronCount.intron_count
+ FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
+ , (SELECT count (*) as intron_count, source_id FROM apidbtuning.genemodeldump WHERE type = 'Intron' GROUP BY source_id) intronCount
+ WHERE gij.gene_source_id = intronCount.source_id
+ AND gij.na_sequence_id = stats.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= 2)
+ ) t2
+ GROUP BY gene_source_id, ontology_term, intron_count
+
+ UNION
+
+ SELECT gene_source_id
+ , ontology_term
+ , CASE WHEN count(*) = intron_count THEN 'All-low'
+ WHEN count(*) = 0 THEN 'None'
+ ELSE 'Any-low' END as string_value
+ FROM (
+ SELECT gij.gene_source_id
+ , 'intron_junction' as ontology_term
+ , intronCount.intron_count
+ FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
+ , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
+ WHERE gij.gene_source_id = intronCount.source_id
+ AND gij.na_sequence_id = stats.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max)
+ AND gij.intron_feature_id not in (
+ SELECT gij.intron_feature_id
+ FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats
+ WHERE gij.na_sequence_id = stats.na_sequence_id
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= 2)
+ )
+ ) t3
+ GROUP BY gene_source_id, ontology_term, intron_count
+
+ UNION
+
+ SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-low') as string_value
+ FROM (
+ SELECT gene_source_id
+ , ontology_term
+ , case when count(*) = intron_count THEN 'All'
+ when count(*) = 0 THEN 'None'
+ else 'Any' end as string_value
+ FROM (
+ SELECT gij.gene_source_id
+ , 'intron_junction' as ontology_term
+ , intronCount.intron_count
+ FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
+ , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
+ WHERE gij.gene_source_id = intronCount.source_id
+ AND gij.na_sequence_id = stats.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max)
+ AND gij.intron_feature_id not in (
+ SELECT gij.intron_feature_id
+ FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats
+ WHERE gij.na_sequence_id = stats.na_sequence_id
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= 2)
+ )
+ ) t
+ GROUP BY gene_source_id, ontology_term, intron_count
+ ) t4
+ WHERE string_value = 'All'
+ ) t
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql b/Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords.psql
new file mode 100644
index 0000000000..ac598bbc08
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/IntronUtrCoords.psql
@@ -0,0 +1,35 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE IntronUtrCoords AS
+ SELECT na_feature_id, source_id,
+ '[' || regexp_replace(string_agg(text,',' ORDER BY start_min), '.quot;', '"' ) || ']' AS gen_rel_intron_utr_coords
+ FROM (
+ SELECT na_feature_id, source_id, start_min,
+ '["' || feature_type || '",' || start_min || ',' || end_max || ']' AS text
+ FROM (
+ SELECT fl.feature_type, tl.na_feature_id, tl.feature_source_id AS source_id,
+ CASE
+ WHEN tl.is_reversed = 1
+ THEN tl.end_max - fl.end_max + 1
+ ELSE fl.start_min - tl.start_min + 1
+ END AS start_min,
+ CASE
+ WHEN tl.is_reversed = 1
+ THEN tl.end_max - fl.start_min + 1
+ ELSE fl.end_max - tl.start_min + 1
+ END AS end_max
+ FROM
+ apidb.TranscriptLocation tl, apidb.FeatureLocation fl
+ WHERE
+ tl.na_feature_id = fl.parent_id
+ AND fl.feature_type in('UTR', 'Intron')
+ AND tl.is_top_level = 1
+ AND fl.is_top_level = 1
+ ) t1
+ ) t2
+ GROUP BY na_feature_id, source_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
new file mode 100644
index 0000000000..0d38e419d9
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
@@ -0,0 +1,16 @@
+
+
+ CREATE INDEX iuc_srcid_ix
+ ON IntronUtrCoords (source_id, na_feature_id)
+
+
+ ;
+
+
+
+ CREATE INDEX iuc_nfid_ix
+ ON IntronUtrCoords (na_feature_id, source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ.psql
new file mode 100644
index 0000000000..edc7d78f8e
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/NameMappingGIJ.psql
@@ -0,0 +1,125 @@
+
+
+ CREATE UNLOGGED TABLE JunExpGIJtmp AS
+ WITH ij AS (
+ SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name,
+ regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name
+ FROM panio p, panio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
+ AND pj.input_pan_id = p.input_pan_id
+ AND p.output_pan_id = pan.protocol_app_node_id
+ AND pan.name like '%tpm - unique%'
+ AND p.output_pan_id = nafe.protocol_app_node_id
+ GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
+ ORDER BY pj.output_pan_id
+ ) , stats AS (
+ SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
+ sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
+ FROM apidb.IntronJunction
+ WHERE unique_reads >= 1
+ GROUP BY protocol_app_node_id
+ ), part AS (
+ SELECT
+ ij.junctions_pan_id, ij.avg_value, stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT * FROM (
+ SELECT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ) t
+ ORDER BY junctions_pan_id
+
+ ;
+
+
+
+ create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE MappingStatsGIJtmp (
+ junctions_pan_id,
+ read_length,
+ mapped_reads,
+ avg_mapping_coverage,
+ num_replicates
+ ) AS
+ SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
+ round(avg(number_mapped_reads),1) as mapped_reads,
+ round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
+ as avg_mapping_coverage,
+ count(*) as num_replicates
+ FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
+ cb.value::NUMERIC as number_mapped_reads,
+ cc.value::NUMERIC as avg_mapping_coverage
+ FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
+ PANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb,
+ STUDY.CHARACTERISTIC cc, sres.ontologyterm otc
+ WHERE je.junctions_pan_id = ioa.output_pan_id
+ AND ioa.input_pan_id = ca.protocol_app_node_id
+ AND ca.value is not null
+ AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID
+ AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length'
+ AND ca.protocol_app_node_id = cb.protocol_app_node_id
+ AND cb.value is not null
+ AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID
+ AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads'
+ AND ca.protocol_app_node_id = cc.protocol_app_node_id
+ AND cc.value is not null
+ AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID
+ AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage'
+ ) t
+ GROUP by Junctions_Pan_Id
+
+ ;
+
+
+
+ CREATE INDEX mpstats_pk_ix on MappingStatsGIJtmp
+ (junctions_pan_id,read_length,mapped_reads,avg_mapping_coverage,num_replicates)
+
+
+ ;
+
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE NameMappingGIJ AS
+ SELECT DISTINCT edp.dataset_presenter_display_name as exp_name,
+ edp.external_database_name, je.sample_name,
+ je.junctions_pan_id, je.exp_pan_id,
+ substr(dp.value, 1, 4000) as presenter_switch_strands,
+ substr(sj.value, 1, 4000) as show_intron_junctions,
+ substr(uj.value, 1, 4000) as include_unified_junctions,
+ ms.read_length, ms.mapped_reads, ms.avg_mapping_coverage, ms.num_replicates,
+ je.switch_strands, je.multiplier
+ FROM junexpgijtmp je, study.nodeNodeSet sl, study.NodeSet s, ExternalDbDatasetPresenter edp,
+ DatasetProperty dp, DatasetProperty sj, DatasetProperty uj, mappingstatsgijtmp ms
+ WHERE sl.protocol_app_node_id = je.junctions_pan_id
+ AND je.junctions_pan_id = ms.junctions_pan_id
+ AND s.node_set_id = sl.node_set_id
+ AND s.node_type like 'junctions'
+ AND s.external_database_release_id = edp.external_database_release_id
+ AND dp.dataset_presenter_id = edp.dataset_presenter_id
+ AND dp.property = 'switchStrandsProfiles'
+ AND sj.dataset_presenter_id = edp.dataset_presenter_id
+ AND sj.property = 'showIntronJunctions'
+ AND uj.dataset_presenter_id = edp.dataset_presenter_id
+ AND uj.property = 'includeInUnifiedJunctions'
+ AND (substr(sj.value, 1, 10) = 'true' or substr(uj.value, 1, 10) = 'true')
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
new file mode 100644
index 0000000000..025dd1f8b3
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
@@ -0,0 +1,6 @@
+
+
+ create index namemappinggij_ix on NameMappingGIJ (junctions_pan_id,exp_pan_id)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
new file mode 100644
index 0000000000..56714b16e6
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
@@ -0,0 +1,15 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE OrganismAbbreviation AS
+ SELECT organism, abbreviation
+ FROM (
+ select tn.name as organism, o.name_for_filenames as abbreviation
+ from apidb.Organism o, sres.TaxonName tn
+ where o.taxon_id = tn.taxon_id
+ and tn.name_class = 'scientific name'
+ ) subquery1
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
new file mode 100644
index 0000000000..f04888b75c
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
@@ -0,0 +1,42 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE OrganismAbbreviationBlast as
+ SELECT organism, parent, abbreviation, substr(project_id, 1, 20) as project_id
+ FROM OrganismAbbreviationWS
+ UNION
+ -- all familes for popsets
+ SELECT DISTINCT family_name_for_files || ' Popset/Genbank Isolates' as organism, '' as parent,
+ family_name_for_files as abbreviation, substr(project_name, 1, 20) as project_id
+ FROM apidb.Organism
+ WHERE family_name_for_files is not null
+ AND abbrev || '_isolates_genbank_RSRC' IN (SELECT external_db_name as db_name FROM PopsetAttributes)
+ AND family_name_for_files NOT IN ('Culicosporidae', 'Dubosqiidae', 'Ordosporidae')
+ UNION
+ SELECT special.organism, special.parent, special.abbreviation,
+ substr(ot.project_id, 1, 20) as project_id
+ FROM OrganismTree ot,
+ ( -- all species and speciesAbbreviations from apidb.Organism where we have ests
+ SELECT DISTINCT
+ sp.name as organism, ot.parentTerm as parent,
+ regexp_replace(org.name_for_filenames, replace(org.strain_abbrev, '/','_'),'') as abbreviation
+ FROM sres.TaxonName sp, TaxonSpecies ts, apidb.Organism org, OrganismTree ot
+ WHERE org.taxon_id = ts.taxon_id
+ AND ts.species_taxon_id = sp.taxon_id
+ AND sp.name_class = 'scientific name'
+ AND ot.term = sp.name
+ AND org.strain_abbrev is not null
+ AND org.name_for_filenames is not null
+ AND sp.taxon_id
+ in (SELECT etn.taxon_id
+ FROM sres.TaxonName etn
+ WHERE etn.name in (SELECT organism FROM EstAttributes))
+ UNION
+ SELECT 'Cryptosporidiidae SSU_18srRNA Reference Isolates' as organism,
+ 'Cryptosporidium' as parent, 'CryptosporidiidaeReference' as abbreviation
+ ) special
+ WHERE special.parent = ot.term
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
new file mode 100644
index 0000000000..5414446561
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
@@ -0,0 +1,327 @@
+
+
+ CREATE UNLOGGED TABLE DataSourceCount AS
+ SELECT
+ taxon_id,
+ max(CASE WHEN stype = 'organellar' THEN num ELSE null END) as organellar_has,
+ max(CASE WHEN stype = 'Epitope' THEN num ELSE null END) as Epitope_has,
+ max(CASE WHEN stype = 'Array' THEN num ELSE null END) as Array_has,
+ max(CASE WHEN stype = 'HTSIsolate' THEN num ELSE null END) as HTSIsolate_has,
+ max(CASE WHEN stype = 'Popset' THEN num ELSE null END) as Popset_has
+ FROM (
+ SELECT DISTINCT enas.taxon_id, 'organellar' AS stype, 1 AS num
+ FROM dots.externalNAsequence enas, SRES.ontologyterm ot
+ WHERE enas.sequence_ontology_id = ot.ontology_term_id
+ AND ot.name in( 'mitochondrial_chromosome','apicoplast_chromosome')
+ GROUP BY enas.taxon_id
+ UNION
+ SELECT distinct ds.taxon_id, 'HTSIsolate' AS stype, 1 AS num
+ FROM apidb.DataSource ds
+ WHERE ds.type = 'isolates' AND ds.subtype = 'HTS_SNP'
+ GROUP BY taxon_id
+ UNION
+ SELECT distinct ds.taxon_id, 'Popset' AS stype, 1 AS num
+ FROM apidb.DataSource ds
+ WHERE ds.subtype = 'sequenceing_types'
+ GROUP BY taxon_id
+ UNION
+ SELECT distinct ds.taxon_id, 'Epitope' AS stype, 1 AS num
+ FROM apidb.DataSource ds
+ WHERE ds.type = 'epitope'
+ GROUP BY taxon_id
+ UNION
+ SELECT distinct ds.taxon_id, 'Array' AS stype, 1 AS num
+ FROM apidb.DataSource ds
+ WHERE ds.type = 'transcript_expression'
+ AND ds.subtype = 'array'
+ GROUP BY taxon_id
+ ) t
+ GROUP BY taxon_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE OrganismCentromere AS
+ SELECT distinct s.taxon_id,
+ case when count(*) > 0 then 1 else 0 end as hasCentromere
+ FROM DOTS.MISCELLANEOUS f
+ , sres.ontologyTerm ot
+ , dots.nasequence s
+ WHERE ot.ontology_term_id = f.sequence_ontology_id
+ AND ot.name='centromere'
+ AND f.na_sequence_id = s.na_sequence_id
+ GROUP BY s.taxon_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE SequenceCount AS
+ SELECT
+ taxon_id,
+ max(CASE WHEN sequence_type = 'contig' THEN num ELSE null END) as contig_num,
+ max(CASE WHEN sequence_type = 'supercontig' THEN num ELSE null END) as supercont_num,
+ max(CASE WHEN sequence_type = 'chromosome' THEN num ELSE null END) as chrom_num
+ FROM (
+ SELECT count(*) as num, sequence_type, taxon_id
+ FROM GenomicSeqAttributes
+ WHERE is_top_level =1
+ GROUP BY taxon_id, sequence_type
+ ) t
+ GROUP BY taxon_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE CommunityCount AS
+ -- SELECT taxon_id, count(*) as communityCount
+ -- TODO: addd this back
+ select taxon_id, 0 as communityCount
+ FROM GeneAttributes
+ --WHERE
+ --(source_id, project_id) IN (
+ --SELECT distinct stable_id, project_name
+ --userlogins5.mappedComment@prodn.login_comment
+ --FROM userlogins5.mappedComment
+ --WHERE is_visible = 1
+ --AND comment_target_id = 'gene'
+ --)
+ GROUP BY taxon_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE ProfileCount AS
+ SELECT ga.taxon_id,
+ count(distinct(case when p.dataset_type = 'transcript_expression'
+ and p.dataset_subtype like '%rt_pcr%'
+ and ga.is_deprecated = 0
+ then p.source_id
+ else ''
+ end)) as rtPCRCount,
+ count(distinct(case when p.dataset_type = 'transcript_expression'
+ and p.dataset_subtype = 'rnaseq'
+ and ga.is_deprecated = 0
+ then p.source_id
+ else ''
+ end)) as rnaSeqCount,
+ count(distinct(case when p.dataset_type = 'transcript_expression'
+ and p.dataset_subtype = 'array'
+ and ga.is_deprecated = 0
+ then p.source_id
+ else ''
+ end)) as geneArrayCount
+ FROM Profile p
+ RIGHT OUTER JOIN GeneAttributes ga ON ga.source_id = p.source_id
+ GROUP BY ga.taxon_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE PopsetCount AS
+ SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id
+ FROM (
+ (SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id
+ FROM dots.similarity s, PopsetAttributes i,
+ core.tableinfo t, dots.nasequence nas
+ WHERE s.query_id = i.na_sequence_id
+ AND nas.na_sequence_id = s.subject_id
+ AND t.table_id = s.subject_table_id
+ AND t.table_id = s.query_table_id
+ AND t.name = 'ExternalNASequence'
+ AND s.pvalue_exp <= -10
+ ) sim LEFT JOIN
+ (SELECT i.source_id, seq.source_id as sequence_id
+ FROM dots.similarity s, PopsetAttributes i, GeneAttributes g,
+ core.tableinfo t, dots.nasequence seq
+ WHERE s.query_id = i.na_sequence_id
+ AND s.subject_id = g.na_sequence_id
+ AND t.table_id = s.subject_table_id
+ AND t.table_id = s.query_table_id
+ AND s.min_subject_start <= g.end_max
+ AND s.max_subject_end >= g.start_min
+ AND g.na_sequence_id = seq.na_sequence_id
+ AND t.name = 'ExternalNASequence'
+ ) gene
+ ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id)
+ GROUP BY sim.taxon_id
+
+ ;
+
+
+
+ CREATE UNLOGGED TABLE GeneCount AS
+ SELECT genomestat.taxon_id,
+ genomestat.project_id,
+ genomestat.database_version,
+ genomestat.ncbi_tax_id,
+ genomestat.Megabps,
+ coalesce(snpCount.ct,0) as snpCount,
+ coalesce(count(distinct ga.source_id),0) as geneCount,
+ coalesce(count(distinct case when ga.is_pseudo =1 then ga.source_id else '' end),0) as pseudoGeneCount,
+ coalesce(count(distinct case when (ga.gene_type ='protein coding' or ga.gene_type ='protein coding gene') then ga.source_id else '' end),0) as codingGeneCount,
+ coalesce(count(distinct case when (ga.gene_type ='protein coding' or ga.gene_type ='protein coding gene') then '' else ga.source_id end),0) as otherGeneCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then cct.gene_source_id
+ else NULL
+ end)),0) ChipChipGeneCount ,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then pp.source_id
+ else NULL
+ end)),0) orthologCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then gts.gene_source_id
+ else NULL
+ end)),0) goCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then tfbs.gene_source_id
+ else NULL
+ end)),0) tfbsCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then mss.aa_sequence_id
+ else NULL
+ end)),0) proteomicsCount,
+ coalesce(count (distinct (case when ga.is_deprecated = 0
+ then est.source_id
+ else NULL
+ end)),0) estCount,
+ coalesce(count (distinct (case when (ga.is_deprecated = 0 and ta.ec_numbers is not null)
+ then ga.source_id
+ else NULL
+ end)),0) ecNumberCount
+ FROM GeneAttributes ga
+ LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id
+ LEFT OUTER JOIN gotermsummary gts on ga.source_id = gts.gene_source_id
+ LEFT OUTER JOIN TFBSGene tfbs on ga.source_id = tfbs.gene_source_id
+ LEFT OUTER JOIN TranscriptAttributes ta on ta.gene_source_id = ga.source_id
+ LEFT OUTER JOIN apidb.MassSpecSummary mss on ta.aa_sequence_id = mss.aa_sequence_id
+ LEFT OUTER JOIN chipchipTranscript cct on ga.source_id = cct.gene_source_id
+ LEFT OUTER JOIN (
+ SELECT distinct s.gene as source_id
+ FROM EstAlignmentGeneSummary s, EstAttributes e
+ WHERE s.est_gene_overlap_length >= 100
+ AND s.is_best_alignment in (1)
+ AND s.percent_est_bases_aligned >= 20
+ AND s.percent_identity >= 90
+ AND e.best_alignment_count <= 1
+ AND e.source_id = s.accession
+ GROUP by s.gene HAVING count(*) >= 1
+ ) est ON ga.source_id = est.source_id
+ RIGHT OUTER JOIN (
+ SELECT project_id, taxon_id,
+ max(database_version) as database_version,
+ CASE WHEN ncbi_tax_id > 9000000000 THEN NULL
+ ELSE ncbi_tax_id
+ END ncbi_tax_id,
+ to_char(sum(length)/1000000,'9999.99') as megabps
+ FROM GenomicSeqAttributes
+ WHERE is_top_level = 1
+ GROUP BY project_ID, taxon_id, ncbi_tax_id
+ ) genomestat ON genomestat.taxon_id = ga.taxon_id
+ LEFT OUTER JOIN (
+ SELECT count(distinct ga.source_id) as ct, ga.taxon_id
+ FROM GeneAttributes ga, SnpAttributes sf
+ WHERE sf.gene_source_id = ga.source_id
+ AND ga.is_deprecated = 0
+ GROUP BY ga.taxon_id
+ ) snpCount ON ga.taxon_id = snpCount.taxon_id
+ GROUP BY genomestat.taxon_id,
+ genomestat.project_id,
+ genomestat.database_version,
+ genomestat.ncbi_tax_id,
+ genomestat.Megabps,
+ snpCount.ct
+
+ ;
+
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE OrganismAttributes AS
+ SELECT oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id
+ , CASE WHEN ltrim(replace(oa.organism_name, tn2.name, ''))= oa.organism_name
+ THEN strain_abbrev
+ ELSE ltrim(replace(oa.organism_name, tn2.name, '')) END AS strain
+ FROM (
+ SELECT o.project_name as project_id,
+ case when t.ncbi_tax_id > 10000000
+ -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' ||
+ -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1"
+ -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits
+ then 'TMPTX_' || o.public_abbrev
+ else 'NCBITAXON_' || t.ncbi_tax_id
+ end as source_id,
+ o.abbrev as internal_abbrev,
+ o.public_abbrev,
+ o.orthomcl_abbrev,
+ o.family_name_for_files,
+ tn.name as organism_name,
+ o.genome_source,
+ o.strain_abbrev,
+ o.is_annotated_genome,
+ o.is_reference_strain,
+ o.is_family_representative,
+ o.name_for_filenames,
+ o.taxon_id as component_taxon_id,
+ gc.database_version,
+ gc.megabps as megabps,
+ gc.ncbi_tax_id as ncbi_tax_id,
+ gc.snpCount as snpCount,
+ gc.geneCount as geneCount,
+ gc.pseudoGeneCount as pseudoGeneCount,
+ gc.codingGeneCount as codingGeneCount,
+ gc.otherGeneCount as otherGeneCount,
+ gc.ChipChipGeneCount as ChipChipGeneCount,
+ gc.orthologCount as orthologCount,
+ gc.goCount as goCount,
+ gc.tfbsCount as tfbsCount,
+ gc.proteomicsCount as proteomicsCount,
+ gc.estCount as estCount,
+ gc.ecNumberCount as ecNumberCount,
+ cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar,
+ cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate,
+ cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset,
+ cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope,
+ cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray,
+ coalesce(oc.hasCentromere, 0) as hasCentromere,
+ coalesce(sc.contig_num, 0) as contigCount,
+ coalesce(sc.supercont_num, 0) as supercontigCount,
+ coalesce(sc.chrom_num, 0) as chromosomeCount,
+ coalesce(cc.communityCount, 0) as communityCount,
+ coalesce(psc.popsetCount, 0) as popsetCount,
+ coalesce(pc.geneArrayCount, 0) as arrayGeneCount,
+ coalesce(pc.rnaSeqCount, 0) as rnaSeqCount,
+ coalesce(pc.rtPCRCount, 0) as rtPCRCount,
+ coalesce(ta.avg_transcript_length, 0) as avg_transcript_length
+ FROM apidb.Organism o
+ INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id
+ INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id
+ LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id
+ LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id
+ LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id
+ LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id
+ LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id
+ LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id
+ LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id
+ LEFT JOIN (
+ SELECT taxon_id, round(avg(length),1) as avg_transcript_length
+ FROM TranscriptAttributes
+ GROUP by taxon_id
+ ) ta ON o.taxon_id = ta.taxon_id
+ WHERE tn.name_class = 'scientific name'
+ ) oa,
+ TaxonSpecies ts,
+ sres.taxon t,
+ sres.taxonname tn2
+ WHERE oa.component_taxon_id = ts.taxon_id
+ AND ts.species_taxon_id = t.taxon_id
+ AND ts.species_taxon_id = tn2.taxon_id
+ AND tn2.name_class = 'scientific name'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
new file mode 100644
index 0000000000..b40126a71e
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
@@ -0,0 +1,7 @@
+
+
+create unique index Organism_sourceId_idx ON OrganismAttributes (source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
new file mode 100644
index 0000000000..326c177a8a
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
@@ -0,0 +1,49 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE OrganismSelectTaxonRank AS
+ WITH organism_rank AS (
+ SELECT tn1.name as organism, o.public_abbrev, tn2.name as parent_organism,
+ case when tn2.name = 'Oomycetes' then 'class' else r.rank end as rank
+ FROM (
+ WITH RECURSIVE cte AS(
+ SELECT taxon_id input, taxon_id, rank, parent_id
+ FROM sres.taxon
+ WHERE taxon_id IN (SELECT taxon_id FROM apidb.organism WHERE is_annotated_genome = 1)
+ UNION
+ SELECT cte.input, t.taxon_id, t.rank, t.parent_id
+ FROM sres.taxon t, cte
+ WHERE cte.parent_id = t.taxon_id
+ )
+ SELECT input, taxon_id, rank
+ FROM cte
+ ) r
+ , sres.taxonname tn1
+ , sres.taxonname tn2
+ , apidb.organism o
+ WHERE r.input = tn1.taxon_id
+ AND r.taxon_id = tn2.taxon_id
+ AND tn1.name_class = 'scientific name'
+ AND tn2.name_class = 'scientific name'
+ AND (r.rank in ('phylum', 'genus', 'species', 'kingdom', 'class') or (r.rank = 'no rank' and tn2.name = 'Oomycetes'))
+ AND tn1.taxon_id = o.taxon_id
+ )
+ SELECT
+ organisms.organism
+ , organisms.public_abbrev
+ , coalesce(phylum.parent_organism, 'N/A') as phylum
+ , coalesce(genus.parent_organism, 'N/A') as genus
+ , coalesce(species.parent_organism, 'N/A') as species
+ , coalesce(kingdom.parent_organism, 'N/A') as kingdom
+ , coalesce(class.parent_organism, 'N/A') as class
+ FROM
+ (select distinct organism, public_abbrev from organism_rank) organisms
+ LEFT JOIN ( select * from organism_rank where rank= 'phylum') phylum ON organisms.organism = phylum.organism
+ LEFT JOIN ( select * from organism_rank where rank= 'genus') genus ON organisms.organism = genus.organism
+ LEFT JOIN ( select * from organism_rank where rank= 'species') species ON organisms.organism = species.organism
+ LEFT JOIN ( select * from organism_rank where rank= 'kingdom') kingdom ON organisms.organism = kingdom.organism
+ LEFT JOIN ( select * from organism_rank where rank= 'class') class ON organisms.organism = class.organism
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/PathwayNodeGene.psql b/Model/lib/psql/webtables/MO/PathwayNodeGene.psql
new file mode 100644
index 0000000000..f12202677e
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/PathwayNodeGene.psql
@@ -0,0 +1,14 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE PathwayNodeGene as
+ SELECT DISTINCT pn.pathway_node_id
+ , tp.gene_source_id
+ FROM transcriptpathway tp
+ , sres.pathwaynode pn
+ WHERE tp.pathway_id = pn.pathway_id
+ AND tp.ec_number_gene like replace(pn.display_label, '-', '%')
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql b/Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable.psql
new file mode 100644
index 0000000000..e792016369
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable.psql
@@ -0,0 +1,44 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE PathwaysGeneTable AS
+ SELECT * FROM (
+ SELECT DISTINCT gene_source_id
+ , project_id
+ , pathway_source_id
+ , pathway_name
+ , count(reaction_source_id) as reactions
+ , enzyme
+ , expasy_url
+ , pathway_source
+ , exact_match
+ FROM (
+ SELECT DISTINCT tp.gene_source_id
+ , tp.project_id
+ , tp.pathway_source_id
+ , tp.pathway_name
+ , pr.reaction_source_id
+ , pr.enzyme
+ , pr.expasy_url
+ , tp.pathway_source
+ , CASE max(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match
+ FROM TranscriptPathway tp
+ , PathwayAttributes pa
+ , PathwayCompounds pc
+ , PathwayReactions pr
+ WHERE tp.pathway_id = pa.pathway_id
+ AND pc.pathway_id = pa.pathway_id
+ AND pr.reaction_id = pc.reaction_id
+ AND pr.ext_db_name = pc.ext_db_name
+ AND tp.ec_number_pathway = pr.enzyme
+ AND tp.wildcard_count_gene <= tp.wildcard_count_pathway
+ AND pr.enzyme != '-.-.-.-'
+ GROUP BY tp.gene_source_id, tp.project_id, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
+ ) t
+ GROUP BY gene_source_id, project_id, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match
+ ) t2
+ ORDER BY pathway_source, lower(pathway_name)
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
new file mode 100644
index 0000000000..776a8f5ba5
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
@@ -0,0 +1,9 @@
+
+
+ create index pgt_ix on PathwaysGeneTable
+ (gene_source_id, project_id, pathway_source_id, pathway_name,
+ reactions, enzyme, expasy_url, pathway_source, exact_match)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity.psql b/Model/lib/psql/webtables/MO/PdbSimilarity.psql
new file mode 100644
index 0000000000..2818d01271
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/PdbSimilarity.psql
@@ -0,0 +1,32 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE PdbSimilarity AS
+ SELECT ta.source_id, eas.source_id AS pdb_chain,
+ substr(eas.description, 1, 100) AS pdb_title,
+ substr(eas.source_id
+ , 1
+ , length(eas.source_id) - (
+ CASE strpos(reverse(eas.source_id), '_')
+ WHEN 0 THEN length(eas.source_id)
+ ELSE strpos(reverse(eas.source_id), '_') END
+ )
+ ) AS pdb_id,
+ s.evalue_mant, s.evalue_exp,
+ s.pident as percent_identity,
+ ROUND((s.length / ta.protein_length) * 100) AS percent_plasmo_coverage,
+ SUBSTR(tn.name, 1, 100) AS taxon,
+ eas.taxon_id as pdb_taxon_id, ta.taxon_id as gene_taxon_id
+ FROM apidb.PdbSimilarity s,
+ apiDB.ProteinDataBank eas,
+ sres.TaxonName tn,
+ TranscriptAttributes ta
+ WHERE ta.aa_sequence_id = s.aa_sequence_id
+ AND s.pident = eas.protein_data_bank_id
+ and tn.name_class = 'scientific name'
+ AND eas.taxon_id = tn.taxon_id
+ ORDER BY ta.source_id, eas.source_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql b/Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/Profile.psql b/Model/lib/psql/webtables/MO/Profile.psql
new file mode 100644
index 0000000000..7db9b02f91
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/Profile.psql
@@ -0,0 +1,800 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE Profile (
+ DATASET_NAME VARCHAR(200),
+ DATASET_TYPE VARCHAR(50),
+ DATASET_SUBTYPE VARCHAR(50),
+ PROFILE_TYPE VARCHAR(30),
+ NODE_TYPE VARCHAR(100),
+ SOURCE_ID VARCHAR(500),
+ PROFILE_STUDY_ID NUMERIC(7),
+ PROFILE_SET_NAME VARCHAR(400),
+ PROFILE_SET_SUFFIX VARCHAR(50),
+ PROFILE_AS_STRING VARCHAR(4000),
+ MAX_VALUE NUMERIC,
+ MIN_VALUE NUMERIC,
+ MAX_TIMEPOINT VARCHAR(200),
+ MIN_TIMEPOINT VARCHAR(200)
+ )
+
+
+:DECLARE_PARTITION;
+
+
+
+ DO $$
+ DECLARE
+ ctrows NUMERIC := 0;
+ commit_after NUMERIC := 10000;
+ pf_rows record;
+ BEGIN
+ FOR pf_rows IN (
+ SELECT
+ ds.name as dataset_name, ds.type as dataset_type,
+ ds.subtype as dataset_subtype, profile.profile_type, profile.node_type,
+ profile.source_id, profile.node_set_id as profile_study_id,
+ ps.name as profile_set_name,
+ cast(case
+ when regexp_replace(ps.name, '\[.+\]', '') like '% - %'
+ then regexp_replace(regexp_replace(ps.name, ' *\[.+\]', ''), '.+ - ', '')
+ -- special cases for legacy datasets
+ when regexp_replace(ps.name, '\[.+\]', '') like 'DeRisi%'
+ then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'DeRisi ', '')
+ when regexp_replace(ps.name, '\[.+\]', '') like 'winzeler_cc_%'
+ then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'winzeler_cc_', '')
+ when regexp_replace(ps.name, '\[.+\]', '') like 'Llinas RT transcription and decay %'
+ then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'Llinas RT transcription and decay ', '')
+ when regexp_replace(ps.name, '\[.+\]', '') like 'T.brucei paired end RNA-Seqdata from Horn%'
+ then regexp_replace(
+ regexp_replace(
+ regexp_replace(
+ regexp_replace(
+ regexp_replace(ps.name, '\[.+ nonunique\]', ''),
+ '\[.+ unique\]', ' - unique'),
+ '\[.+\]', ''),
+ 'aligned with cds coordinates ', 'cds coordinates'),
+ 'T.brucei paired end RNA-Seqdata from Horn ', '')
+ else null
+ end as varchar(50)
+ ) as profile_set_suffix,
+ CASE WHEN replace(profile.profile_as_string, 'NA' || CHR(9), '') = 'NA' THEN null ELSE profile.profile_as_string END as profile_as_string,
+ profile.max_value, profile.min_value,
+ profile.max_timepoint, profile.min_timepoint
+ FROM apidb.DataSource ds, sres.ExternalDatabase d,
+ sres.ExternalDatabaseRelease r, study.NodeSet ps,
+ (
+ -- gene profiles
+ SELECT gene_profile.node_set_id, ga.source_id, gene_profile.profile_type, gene_profile.node_type,
+ gene_profile.profile_as_string, gene_profile.max_value, gene_profile.min_value,
+ gene_profile.max_timepoint, gene_profile.min_timepoint
+ FROM GeneAttributes ga,
+ (
+ SELECT * FROM (
+ WITH result AS (
+ SELECT na_feature_id, protocol_app_node_id, max(value) as value
+ FROM results.NAFEATUREHOSTRESPONSE
+ GROUP BY na_feature_id, protocol_app_node_id
+ ), min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ from study.protocolappnode pan, study.NodeNodeSet sl, study.NodeSet s, result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, min_max, result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t1
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.mean_phenotype) over w1 as max_value,
+ first_value(result.mean_phenotype) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.CrisprPhenotype result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.mean_phenotype ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.mean_phenotype::numeric, 2)::varchar,'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.CrisprPhenotype result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND result.na_feature_id = min_max.na_feature_id
+ AND sl.node_set_id = min_max.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t2
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.score) over w1 as max_value,
+ first_value(result.score) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeScore result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.score ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.score::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeScore result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t3
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.relative_growth_rate) over w1 as max_value,
+ first_value(result.relative_growth_rate) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeGrowthRate result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.relative_growth_rate ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.relative_growth_rate::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeGrowthRate result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t4
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t5
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.percentile_channel1) over w1 as max_value,
+ first_value(result.percentile_channel1) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.percentile_channel1 ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'channel1_percentiles' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.percentile_channel1::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t6
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.percentile_channel2) over w1 as max_value,
+ first_value(result.percentile_channel2) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.percentile_channel2 ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'channel2_percentiles' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.percentile_channel2::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t7
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.standard_error) over w1 as max_value,
+ first_value(result.standard_error) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.standard_error ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'standard_error' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.standard_error::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t8
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.pvalue) over w1 as max_value,
+ first_value(result.pvalue) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.pvalue ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'pvalue' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.pvalue::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t9
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.amplitude) over w1 as max_value,
+ first_value(result.amplitude) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.amplitude ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'amplitude' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.amplitude::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t10
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.period) over w1 as max_value,
+ first_value(result.period) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.period ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'period' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.period::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t11
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.probability_mean) over w1 as max_value,
+ first_value(result.probability_mean) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.probability_mean ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'probability_mean' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.probability_mean::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t12
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.sd) over w1 as max_value,
+ first_value(result.sd) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.sd ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'sd' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.sd::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t13
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.lower_CI) over w1 as max_value,
+ first_value(result.lower_CI) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.lower_CI ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'lower_CI' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.lower_CI::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t14
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+ last_value(result.upper_CI) over w1 as max_value,
+ first_value(result.upper_CI) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.na_feature_id
+ ORDER BY result.upper_CI ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.na_feature_id, 'upper_CI' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.upper_CI::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.na_feature_id = result.na_feature_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t15
+-- TO FIX
+-- UNION ALL
+-- SELECT * FROM (
+-- WITH min_max AS (
+-- SELECT DISTINCT sl.node_set_id, result.na_feature_id,
+-- null as max_value,
+-- null as min_value,
+-- last_value(pan.name) over w1 as max_timepoint,
+-- first_value(pan.name) over w1 as min_timepoint
+-- FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.HaplotypeResult result
+-- WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+-- AND result.protocol_app_node_id = pan.protocol_app_node_id
+-- AND sl.node_set_id = s.node_set_id
+-- WINDOW w1 AS (
+-- PARTITION BY sl.node_set_id, result.na_feature_id
+-- ORDER BY result.value ASC
+-- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+-- )
+-- )
+-- SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type,
+-- string_agg(coalesce(result.value, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+-- min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+-- FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.HaplotypeResult result, min_max
+-- WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+-- AND result.protocol_app_node_id = pan.protocol_app_node_id
+-- AND sl.node_set_id = s.node_set_id
+-- AND min_max.na_feature_id = result.na_feature_id
+-- AND min_max.node_set_id = sl.node_set_id
+-- GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+-- ) t16
+ ) gene_profile
+ WHERE ga.na_feature_id = gene_profile.na_feature_id
+ UNION ALL
+ -- compound profiles
+ SELECT compound_profile.node_set_id,
+ case
+ WHEN compound_profile.isotopomer IS NOT NULL
+ THEN ca.source_id || '|' || compound_profile.isotopomer
+ WHEN compound_profile.mass IS NOT NULL
+ THEN ca.source_id || '|' || compound_profile.mass || '|' || compound_profile.retention_time
+ ELSE ca.source_id
+ END as source_id,
+ compound_profile.profile_type, compound_profile.node_type,
+ compound_profile.profile_as_string, compound_profile.max_value, compound_profile.min_value,
+ compound_profile.max_timepoint, compound_profile.min_timepoint
+ FROM CompoundAttributes ca,
+ (
+ SELECT * FROM (
+ WITH result AS (
+ SELECT max(value) as value, compound_id, protocol_app_node_id, isotopomer
+ FROM results.CompoundMassSpec
+ GROUP BY compound_id, protocol_app_node_id, isotopomer
+ ) , min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.compound_id, result.isotopomer,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.compound_id, result.isotopomer
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.compound_id, result.isotopomer, 'values' as profile_type, s.node_type,
+ NULL::numeric as compound_peaks_id, NULL::numeric as mass, NULL::numeric as retention_time,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, result, min_max
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.compound_id = result.compound_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.compound_id, result.isotopomer, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t1
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, cpc.compound_id,
+ last_value(cmsr.value) over w1 as max_value,
+ first_value(cmsr.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp,
+ apidb.CompoundPeaksChebi cpc
+ WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id
+ AND cmsr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND cp.compound_peaks_id = cmsr.compound_peaks_id
+ AND cpc.compound_peaks_id = cp.compound_peaks_id
+ AND pan.name like '%mean%'
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time
+ ORDER BY cmsr.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, cpc.compound_id, cpc.isotopomer, 'values' as profile_type, s.node_type,
+ cpc.compound_peaks_id, cp.mass, cp.retention_time,
+ string_agg(coalesce(round(cmsr.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp,
+ apidb.CompoundPeaksChebi cpc, min_max
+ WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id
+ AND cmsr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND cp.compound_peaks_id = cmsr.compound_peaks_id
+ AND cpc.compound_peaks_id = cp.compound_peaks_id
+ AND pan.name like '%mean%'
+ AND min_max.compound_id = cpc.compound_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t2
+ UNION ALL
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, cpc.compound_id,
+ last_value(cmsr.percentile) over w1 as max_value,
+ first_value(cmsr.percentile) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp,
+ apidb.CompoundPeaksChebi cpc
+ WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id
+ AND cmsr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND cp.compound_peaks_id = cmsr.compound_peaks_id
+ AND cpc.compound_peaks_id = cp.compound_peaks_id
+ AND pan.name like '%mean%'
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time
+ ORDER BY cmsr.percentile ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, cpc.compound_id, cpc.isotopomer, 'percentiles' as profile_type, s.node_type,
+ cpc.compound_peaks_id, cp.mass, cp.retention_time,
+ string_agg(coalesce(round(cmsr.percentile::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp,
+ apidb.CompoundPeaksChebi cpc, min_max
+ WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id
+ AND cmsr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND cp.compound_peaks_id = cmsr.compound_peaks_id
+ AND cpc.compound_peaks_id = cp.compound_peaks_id
+ AND pan.name like '%mean%'
+ AND min_max.compound_id = cpc.compound_id
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t3
+ ) compound_profile
+ WHERE ca.id = compound_profile.compound_id
+ UNION ALL
+ -- OntologyTermResult
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, ot.name,
+ last_value(otr.value) over w1 as max_value,
+ first_value(otr.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.OntologyTermResult otr, sres.OntologyTerm ot
+ WHERE ot.ontology_term_id = otr.ontology_term_id
+ AND otr.protocol_app_node_id = sl.protocol_app_node_id
+ AND otr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, ot.name
+ ORDER BY otr.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, ot.name as source_id, 'value' as profile_type, s.node_type,
+ string_agg(coalesce(round(otr.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s,
+ apidb.OntologyTermResult otr, sres.OntologyTerm ot, min_max
+ WHERE ot.ontology_term_id = otr.ontology_term_id
+ AND otr.protocol_app_node_id = sl.protocol_app_node_id
+ AND otr.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.name = ot.name
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, ot.name, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t1
+ UNION ALL
+ -- SubjectResult
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.subject,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.SubjectResult result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.subject
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.subject as source_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, min_max, apidb.SubjectResult result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.subject = result.subject
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.subject, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t2
+ UNION ALL
+ -- wgcna eigengene results
+ SELECT * FROM (
+ WITH min_max AS (
+ SELECT DISTINCT sl.node_set_id, result.module_name,
+ last_value(result.value) over w1 as max_value,
+ first_value(result.value) over w1 as min_value,
+ last_value(pan.name) over w1 as max_timepoint,
+ first_value(pan.name) over w1 as min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.EigenGeneWgcnaResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ WINDOW w1 AS (
+ PARTITION BY sl.node_set_id, result.module_name
+ ORDER BY result.value ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ )
+ SELECT sl.node_set_id, result.module_name as source_id, 'values' as profile_type, s.node_type,
+ string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string,
+ min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint
+ FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, min_max, apidb.EigenGeneWgcnaResults result
+ WHERE result.protocol_app_node_id = sl.protocol_app_node_id
+ AND result.protocol_app_node_id = pan.protocol_app_node_id
+ AND sl.node_set_id = s.node_set_id
+ AND min_max.module_name = result.module_name
+ AND min_max.node_set_id = sl.node_set_id
+ GROUP BY sl.node_set_id, result.module_name, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type
+ ) t3
+ ) profile
+ WHERE ds.name = d.name
+ AND ds.version = r.version
+ AND d.external_database_id = r.external_database_id
+ AND profile.node_set_id = ps.node_set_id
+ AND ps.external_database_release_id = r.external_database_release_id
+ )
+ LOOP
+ ctrows := ctrows + 1;
+ INSERT INTO Profile
+ (DATASET_NAME, DATASET_TYPE, DATASET_SUBTYPE, PROFILE_TYPE, NODE_TYPE, SOURCE_ID, PROFILE_STUDY_ID, PROFILE_SET_NAME,
+ PROFILE_SET_SUFFIX, PROFILE_AS_STRING, MAX_VALUE, MIN_VALUE, MAX_TIMEPOINT, MIN_TIMEPOINT)
+ VALUES
+ (pf_rows.DATASET_NAME, pf_rows.DATASET_TYPE, pf_rows.DATASET_SUBTYPE, pf_rows.PROFILE_TYPE, pf_rows.NODE_TYPE, pf_rows.SOURCE_ID, pf_rows.PROFILE_STUDY_ID, pf_rows.PROFILE_SET_NAME,
+ pf_rows.PROFILE_SET_SUFFIX, pf_rows.PROFILE_AS_STRING, pf_rows.MAX_VALUE, pf_rows.MIN_VALUE, pf_rows.MAX_TIMEPOINT, pf_rows.MIN_TIMEPOINT);
+ IF ctrows >= commit_after THEN
+ COMMIT;
+ ctrows := 0;
+ END IF;
+ END LOOP;
+ commit;
+ END;
+ $$ LANGUAGE PLPGSQL;
+
+ ;
+
+
+
+ UPDATE Profile
+ SET dataset_name = 'tbruTREU927_Rijo_Circadian_Regulation_rnaSeq_RSRC'
+ WHERE dataset_name= 'tbruTREU927_RNASeq_Rijo_Circadian_Regulation_RSRC'
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/ProfileSamples.psql b/Model/lib/psql/webtables/MO/ProfileSamples.psql
new file mode 100644
index 0000000000..e2ed3d0b2a
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ProfileSamples.psql
@@ -0,0 +1,167 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE ProfileSamples AS
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id as study_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, results.nafeatureexpression r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ AND pt.profile_type not in ('pvalue', 'period', 'amplitude', 'probability_mean','sd','lower_CI','upper_CI','correlation_coefficient')
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.NAFeatureMetacycle r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ AND pt.profile_type not in ('values', 'channel1_percentiles', 'channel2_percentiles', 'standard_error')
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.LopitResults r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ AND pt.profile_type not in ('values', 'channel1_percentiles', 'channel2_percentiles', 'standard_error')
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, results.compoundMassSpec r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, APIDB.compoundmassspecresult r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ and pan.name like '%mean%'
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.ontologytermresult r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, results.nafeaturehostresponse r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.crisprphenotype r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.phenotypescore r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.phenotypegrowthrate r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.subjectresult r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+ UNION
+ SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id,
+ REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''),
+ ' \(.+\)', '')AS protocol_app_node_name,
+ pan.protocol_app_node_id, pan.node_order_num, pt.profile_type,
+ pt.dataset_name, pt.dataset_type, pt.dataset_subtype,
+ pt.profile_set_suffix
+ FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl,
+ study.protocolAppNode pan, apidb.EigenGeneWgcnaResults r
+ WHERE pt.profile_study_id = s.node_set_id
+ AND sl.node_set_id = s.node_set_id
+ AND sl.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id =r.protocol_app_node_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
new file mode 100644
index 0000000000..f9f068e65a
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
@@ -0,0 +1,22 @@
+
+
+ create index psamp_ix
+ on ProfileSamples
+ (dataset_name, profile_type, study_id, node_order_num,
+ protocol_app_node_id, profile_set_suffix, study_name,
+ node_type, protocol_app_node_name)
+
+
+ ;
+
+
+
+ create index psampstdy_ix
+ on ProfileSamples
+ (study_name, node_type, profile_type, node_order_num,
+ protocol_app_node_id, profile_set_suffix, study_id,
+ protocol_app_node_name, dataset_name)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/ProfileType.psql b/Model/lib/psql/webtables/MO/ProfileType.psql
new file mode 100644
index 0000000000..0f0f83616c
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ProfileType.psql
@@ -0,0 +1,13 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE ProfileType AS
+ SELECT DISTINCT dataset_name, profile_study_id, profile_set_name, profile_set_suffix, node_type, profile_type,
+ dataset_type, dataset_subtype
+ FROM profile
+ WHERE profile_as_string IS NOT NULL
+ ORDER BY dataset_name, profile_set_name, profile_type
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/ProfileType_ix.psql b/Model/lib/psql/webtables/MO/ProfileType_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/MO/Profile_ix.psql
new file mode 100644
index 0000000000..25c977a2e7
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/Profile_ix.psql
@@ -0,0 +1,24 @@
+
+
+ create index exprof_idx
+ on Profile (source_id, profile_type, profile_set_name)
+
+
+ ;
+
+
+
+ create index profset_idx
+ on Profile (profile_set_name, profile_type)
+
+
+ ;
+
+
+
+ create index srcdset_idx
+ on Profile (source_id, dataset_subtype, dataset_type)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
new file mode 100644
index 0000000000..de8bb59b53
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
@@ -0,0 +1,207 @@
+
+
+ CREATE TABLE :ORG_ABBREVGoTermList AS
+ SELECT aa_sequence_id, ontology, source,
+ string_agg(go_term_name, ';' ORDER BY go_term_name) AS go_terms,
+ string_agg(go_id, ';' ORDER BY go_term_name) AS go_ids
+ FROM (
+ SELECT aa_sequence_id, ontology,
+ CASE evidence_code WHEN 'IEA' THEN 'predicted' ELSE 'annotated' END AS source, go_term_name, go_id
+ FROM :ORG_ABBREVGeneGoTerms
+ ) t
+ GROUP BY aa_sequence_id, ontology, source
+
+ ;
+
+
+
+ CREATE TABLE :ORG_ABBREVProteinGoAttributes AS
+ SELECT DISTINCT gts.aa_sequence_id,
+ substr(annotated_go_component.go_terms, 1, 300) AS annotated_go_component,
+ substr(annotated_go_function.go_terms, 1, 300) AS annotated_go_function,
+ substr(annotated_go_process.go_terms, 1, 300) AS annotated_go_process,
+ substr(predicted_go_component.go_terms, 1, 300) AS predicted_go_component,
+ substr(predicted_go_function.go_terms, 1, 300) AS predicted_go_function,
+ substr(predicted_go_process.go_terms, 1, 300) AS predicted_go_process,
+ substr(annotated_go_component.go_ids, 1, 300) AS annotated_go_id_component,
+ substr(annotated_go_function.go_ids, 1, 300) AS annotated_go_id_function,
+ substr(annotated_go_process.go_ids, 1, 300) AS annotated_go_id_process,
+ substr(predicted_go_component.go_ids, 1, 300) AS predicted_go_id_component,
+ substr(predicted_go_function.go_ids, 1, 300) AS predicted_go_id_function,
+ substr(predicted_go_process.go_ids, 1, 300) AS predicted_go_id_process
+ FROM
+ (SELECT DISTINCT aa_sequence_id FROM :ORG_ABBREVGoTermSummary) gts
+ LEFT JOIN (
+ SELECT * FROM :ORG_ABBREVGoTermList
+ WHERE source = 'annotated' AND ontology = 'Cellular Component'
+ ) annotated_go_component ON
+ gts.aa_sequence_id = annotated_go_component.aa_sequence_id
+ AND 'annotated' = annotated_go_component.source
+ AND 'Cellular Component' = annotated_go_component.ontology
+ LEFT JOIN (
+ SELECT * FROM :ORG_ABBREVGoTermList
+ WHERE source = 'annotated' AND ontology = 'Molecular Function'
+ ) annotated_go_function ON
+ gts.aa_sequence_id = annotated_go_function.aa_sequence_id
+ AND 'annotated' = annotated_go_function.source
+ AND 'Molecular Function' = annotated_go_function.ontology
+ LEFT JOIN (
+ SELECT * FROM :ORG_ABBREVGoTermList
+ WHERE source = 'annotated' AND ontology = 'Biological Process'
+ ) annotated_go_process ON
+ gts.aa_sequence_id = annotated_go_process.aa_sequence_id
+ AND 'annotated' = annotated_go_process.source
+ AND 'Biological Process' = annotated_go_process.ontology
+ LEFT JOIN (
+ SELECT * FROM :ORG_ABBREVGoTermList
+ WHERE source = 'predicted' AND ontology = 'Cellular Component'
+ ) predicted_go_component ON
+ gts.aa_sequence_id = predicted_go_component.aa_sequence_id
+ AND 'predicted' = predicted_go_component.source
+ AND 'Cellular Component' = predicted_go_component.ontology
+ LEFT JOIN (
+ SELECT * FROM :ORG_ABBREVGoTermList
+ WHERE source = 'predicted' AND ontology = 'Molecular Function'
+ ) predicted_go_function ON
+ gts.aa_sequence_id = predicted_go_function.aa_sequence_id
+ AND 'predicted' = predicted_go_function.source
+ AND 'Molecular Function' = predicted_go_function.ontology
+ LEFT JOIN (
+ SELECT * FROM :ORG_ABBREVGoTermList
+ WHERE source = 'predicted' AND ontology = 'Biological Process'
+ ) predicted_go_process ON
+ gts.aa_sequence_id = predicted_go_process.aa_sequence_id
+ AND 'predicted' = predicted_go_process.source
+ AND 'Biological Process' = predicted_go_process.ontology
+
+ ;
+
+
+
+ create index ProteinGoAttr_aaSequenceId ON :ORG_ABBREVProteinGoAttributes (aa_sequence_id)
+
+
+ ;
+
+
+
+ CREATE TABLE :ORG_ABBREVtProteinAttrsEc AS
+ SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers
+ FROM (SELECT DISTINCT asec.aa_sequence_id,
+ ec.ec_number || ' (' || ec.description || ')' AS ec_number
+ FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec
+ WHERE ec.enzyme_class_id = asec.enzyme_class_id
+ AND NOT asec.evidence_code = 'OrthoMCLDerived'
+ ) t
+ GROUP BY aa_sequence_id
+
+ ;
+
+
+
+ CREATE TABLE :ORG_ABBREVtProteinAttrsEcDerived AS
+ SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived
+ FROM (SELECT DISTINCT asec.aa_sequence_id,
+ ec.ec_number || ' (' || ec.description || ')' AS ec_number
+ FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec
+ WHERE ec.enzyme_class_id = asec.enzyme_class_id
+ AND asec.evidence_code = 'OrthoMCLDerived'
+ ) t
+ GROUP BY aa_sequence_id
+
+ ;
+
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVProteinAttributes AS
+ SELECT pi.name as project_id,
+ tas.source_id, tas.aa_sequence_id,
+ t.source_id as transcript_source_id,
+ gf.source_id as gene_source_id,
+ cdsl.na_sequence_id as na_sequence_id,
+ cdsl.is_reversed,
+ cdsl.start_min as cds_start,
+ cdsl.end_max as cds_end,
+ (taf.translation_stop - taf.translation_start) + 1 AS cds_length,
+ length(tas.sequence) AS protein_length,
+ coalesce(transmembrane.tm_domains, 0) AS tm_count,
+ tas.molecular_weight,
+ asa.min_molecular_weight, asa.max_molecular_weight,
+ asa.isoelectric_point, asa.hydropathicity_gravy_score,
+ asa.aromaticity_score,
+ SUBSTR(sigp.peptide_sequence, 1, 200) as signalp_peptide,
+ ec_numbers,
+ ec_numbers_derived,
+ go.annotated_go_component,
+ go.annotated_go_function,
+ go.annotated_go_process,
+ go.predicted_go_component,
+ go.predicted_go_function,
+ go.predicted_go_process,
+ go.annotated_go_id_component,
+ go.annotated_go_id_function,
+ go.annotated_go_id_process,
+ go.predicted_go_id_component,
+ go.predicted_go_id_function,
+ go.predicted_go_id_process,
+ SUBSTR(coalesce(rt1.anticodon, rt2.anticodon), 1, 3) AS anticodon,
+ 0 AS has_seqedit,
+ row_number() over (partition by t.source_id order by tas.length desc) as rank_in_transcript,
+ uniprot.uniprot_ids
+ FROM
+ core.ProjectInfo pi
+ INNER JOIN dots.Transcript t ON t.row_project_id = pi.project_id
+ INNER JOIN dots.GeneFeature gf ON gf.na_feature_id = t.parent_id
+ INNER JOIN dots.TranslatedAaFeature taf ON t.na_feature_id = taf.na_feature_id
+ INNER JOIN dots.TranslatedAaSequence tas ON taf.aa_sequence_id = tas.aa_sequence_id
+ LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id
+ LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id
+ LEFT JOIN apidb.AaSequenceAttribute asa ON taf.aa_sequence_id = asa.aa_sequence_id
+ LEFT JOIN :ORG_ABBREVProteinGoAttributes go ON tas.aa_sequence_id = go.aa_sequence_id
+ LEFT JOIN (
+ SELECT aa_sequence_id, string_agg(peptide_sequence, ', ') peptide_sequence
+ FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :ORG_ABBREVSignalPeptideDomains) t
+ GROUP BY aa_sequence_id
+ ) sigp ON tas.aa_sequence_id = sigp.aa_sequence_id
+ LEFT JOIN (
+ SELECT protein_source_id, na_sequence_id, is_reversed,
+ MIN(start_min) AS start_min, MAX(end_max) AS end_max
+ FROM apidb.CdsLocation WHERE is_top_level=1
+ GROUP BY protein_source_id, na_sequence_id, is_reversed
+ ) cdsl ON tas.source_id = cdsl.protein_source_id
+ LEFT JOIN (
+ SELECT aa_sequence_id, max(tm_domains) AS tm_domains
+ FROM (SELECT tmaf.aa_sequence_id, COUNT(*) AS tm_domains
+ FROM dots.TransmembraneAaFeature tmaf, dots.AaLocation al
+ WHERE tmaf.aa_feature_id = al.aa_feature_id
+ GROUP BY tmaf.aa_sequence_id) tms
+ GROUP BY tms.aa_sequence_id
+ ) transmembrane ON tas.aa_sequence_id = transmembrane.aa_sequence_id
+ LEFT JOIN :ORG_ABBREVProteinAttrsEc ec ON tas.aa_sequence_id = ec.aa_sequence_id
+ LEFT JOIN :ORG_ABBREVProteinAttrsEcDerived ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id
+ LEFT JOIN (
+ SELECT af.aa_sequence_id,
+ string_agg(dbref.primary_identifier, ',' order by dbref.primary_identifier) as uniprot_ids
+ FROM sres.ExternalDatabase d, sres.ExternalDatabaseRelease r,
+ sres.DbRef, dots.DbRefAaFeature daf, dots.AaFeature af
+ WHERE d.name like 'Uniprot%'
+ AND d.external_database_id = r.external_database_id
+ AND r.external_database_release_id = dbref.external_database_release_id
+ AND dbref.db_ref_id = daf.db_ref_id
+ AND daf.aa_feature_id = af.aa_feature_id
+ GROUP BY af.aa_sequence_id
+ ) uniprot ON tas.aa_sequence_id = uniprot.aa_sequence_id
+ ORDER BY tas.source_id
+
+
+:DECLARE_PARTITION;
+
+
+
+ update :ORG_ABBREVProteinAttributes gaup
+ set has_seqedit = 1
+ where source_id in (select source_id from apidb.seqedit)
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
new file mode 100644
index 0000000000..f9899e7f80
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
@@ -0,0 +1,14 @@
+
+
+ CREATE INDEX PA_sourceId ON :ORG_ABBREVProteinAttributes (source_id)
+
+
+ ;
+
+
+
+ CREATE INDEX PA_aaSequenceId ON :ORG_ABBREVProteinAttributes (aa_sequence_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence.psql b/Model/lib/psql/webtables/MO/ProteinSequence.psql
new file mode 100644
index 0000000000..937e695a3f
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ProteinSequence.psql
@@ -0,0 +1,15 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE ProteinSequence AS
+ WITH pAttr AS (
+ SELECT distinct source_id, aa_sequence_id
+ FROM ProteinAttributes)
+ SELECT pa.source_id, pi.name AS project_id, tas.sequence
+ FROM pAttr pa, dots.TranslatedAaSequence tas, core.Projectinfo pi
+ WHERE pa.aa_sequence_id = tas.aa_sequence_id
+ AND pi.project_id = tas.row_project_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
new file mode 100644
index 0000000000..98f1c06b2d
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
@@ -0,0 +1,7 @@
+
+
+ create index ProtSeq_ix on ProteinSequence (source_id, project_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats.psql b/Model/lib/psql/webtables/MO/RnaSeqStats.psql
new file mode 100644
index 0000000000..33964a796c
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/RnaSeqStats.psql
@@ -0,0 +1,55 @@
+:CREATE_AND_POPULATE
+
+
+ create table RnaSeqStats as
+ select study_id, study_name, dataset_name, taxon_id, round(avg(num_reads::integer),0) as avg_unique_reads
+ from (select sl.node_set_id as study_id
+ , s.name || '[' || s.node_type || ']' as study_name
+ , ed.name as dataset_name
+ , ds.taxon_id
+ , c.value as num_reads
+ from apidb.Datasource ds
+ , sres.ExternalDatabase ed
+ , sres.ExternalDatabaseRelease edr
+ , study.nodeSet s, study.nodeNodeSet sl
+ , study.ProtocolAppNode pan
+ , study.Characteristic c
+ , sres.OntologyTerm ot
+ where ds.external_database_name = ed.name
+ and ed.external_database_id = edr.external_database_id
+ and edr.external_database_release_id = s.external_database_release_id
+ and sl.node_set_id = s.node_set_id
+ and sl.protocol_app_node_id = pan.protocol_app_node_id
+ and pan.protocol_app_node_id = c.protocol_app_node_id
+ and c.qualifier_id = ot.ontology_term_id
+ and (ot.source_id = 'EUPATH_0000460' or ot.source_id = 'EuPathUserDefined_00507')
+ ) subquery1
+ group by study_id, study_name, dataset_name, taxon_id
+ union
+ select study_id, study_name, dataset_name, taxon_id, round(2*avg(num_reads::integer),0) as avg_unique_reads
+ from (select sl.node_set_id as study_id
+ , s.name || '[' || s.node_type || ']' as study_name
+ , ed.name as dataset_name
+ , ds.taxon_id
+ , c.value as num_reads
+ from apidb.Datasource ds
+ , sres.ExternalDatabase ed
+ , sres.ExternalDatabaseRelease edr
+ , study.nodeSet s, study.nodeNodeSet sl
+ , study.ProtocolAppNode pan
+ , study.Characteristic c
+ , sres.OntologyTerm ot
+ where ds.external_database_name = ed.name
+ and ed.external_database_id = edr.external_database_id
+ and edr.external_database_release_id = s.external_database_release_id
+ and sl.node_set_id = s.node_set_id
+ and sl.protocol_app_node_id = pan.protocol_app_node_id
+ and pan.protocol_app_node_id = c.protocol_app_node_id
+ and c.qualifier_id = ot.ontology_term_id
+ and (ot.source_id = 'EUPATH_0000468' or ot.source_id = 'EuPathUserDefined_00515' or ot.source_id = 'EUPATH_0000476' or ot.source_id = 'EuPathUserDefined_00523')
+ ) subquery2
+ group by study_id, study_name, dataset_name, taxon_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql b/Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
new file mode 100644
index 0000000000..80e45141ca
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
@@ -0,0 +1,41 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVSignalPeptideDomains AS
+ SELECT
+ gf.source_id gene_source_id
+ , t.source_id transcript_source_id
+ , taf.na_feature_id
+ , spf.aa_feature_id
+ , spf.aa_sequence_id
+ , spf.parent_id
+ , aal.start_min
+ , aal.end_max
+ , spf.algorithm_name
+ , substr(s.sequence, aal.end_max::INTEGER, 1) peptide_sequence
+ FROM
+ dots.SignalPeptideFeature spf
+ , dots.AaLocation aal
+ , dots.TranslatedAaFeature taf
+ , dots.TranslatedAaSequence tas
+ , dots.GeneFeature gf
+ , dots.AaSequence s
+ , dots.Transcript t
+ WHERE
+ spf.aa_sequence_id = s.aa_sequence_id
+ AND aal.aa_feature_id = spf.aa_feature_id
+ AND t.na_feature_id = taf.na_feature_id
+ AND taf.aa_sequence_id = tas.aa_sequence_id
+ AND tas.aa_sequence_id = spf.aa_sequence_id
+ AND gf.na_feature_id = t.parent_id
+ AND (spf.signal_probability >= .5
+ OR spf.signal_probability IS NULL
+ OR ((spf.means_score + spf.maxy_score) / 2) >= .5
+ OR ( spf.maxy_conclusion + spf.maxc_conclusion + spf.maxs_conclusion + spf.means_conclusion ) >= 3
+ )
+ ORDER BY
+ spf.aa_sequence_id, spf.aa_feature_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
new file mode 100644
index 0000000000..b305c86713
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
@@ -0,0 +1,16 @@
+
+
+ CREATE INDEX SignalP1_ix
+ ON :ORG_ABBREVSignalPeptideDomains (aa_sequence_id)
+
+
+ ;
+
+
+
+ CREATE INDEX SignalP2_ix
+ ON :ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TFBSGene.psql b/Model/lib/psql/webtables/MO/TFBSGene.psql
new file mode 100644
index 0000000000..3a7103d8ac
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TFBSGene.psql
@@ -0,0 +1,46 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE TFBSGene AS
+ SELECT DISTINCT
+ ga.source_id as gene_source_id,
+ ga.organism as organism,
+ ga.genus_species as species,
+ aef.source_id as probe_id,
+ aef.na_feature_id as tfbs_na_feature_id,
+ CASE
+ WHEN ga.is_reversed = 0
+ THEN round(abs(ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)),0)
+ ELSE round(abs(ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)),0)
+ END as distance,
+ CASE
+ WHEN /* distance > 0 */
+ CASE WHEN ga.is_reversed = 0
+ THEN ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)
+ ELSE ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)
+ END > 0
+ THEN
+ CASE
+ WHEN ga.is_reversed = 0
+ THEN '-'
+ ELSE '+'
+ END
+ ELSE
+ CASE
+ WHEN ga.is_reversed = 1
+ THEN '-'
+ ELSE '+'
+ END
+ END as direction,
+ aef.*
+ FROM dots.BindingSiteFeature aef,
+ apidb.FeatureLocation arrloc,
+ GeneAttributes ga
+ WHERE aef.na_feature_id = arrloc.na_feature_id
+ AND arrloc.na_sequence_id = ga.na_sequence_id
+ AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000)
+ or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) )
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
new file mode 100644
index 0000000000..c350a43aa8
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
@@ -0,0 +1,14 @@
+
+
+ create index tfbs_geneid_idx ON TFBSGene (gene_source_id, tfbs_na_feature_id)
+
+
+ ;
+
+
+
+ create index geneid_tfbs_idx ON TFBSGene (tfbs_na_feature_id,gene_source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webtables/MO/TaxonSpecies.psql
new file mode 100644
index 0000000000..7ef89cd520
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TaxonSpecies.psql
@@ -0,0 +1,35 @@
+
+
+ CREATE UNLOGGED TABLE :ORG_ABBREVTaxonOfInterest AS
+ SELECT taxon_id
+ FROM :ORG_ABBREVGenomicSeqAttributes
+ UNION
+ SELECT ns.taxon_id
+ FROM dots.NaSequence ns, dots.Est
+ WHERE est.na_sequence_id = ns.na_sequence_id
+
+ ;
+
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVTaxonSpecies as
+ -- recursively walk taxon tree to find ancestor with rank "species"
+ -- Update this to select max/min level with rank species if there are multiple
+ WITH RECURSIVE cte AS (
+ SELECT TAXON_ID, taxon_id as parent_id, 1 as lvl
+ FROM sres.taxon
+ WHERE taxon_id IN (SELECT taxon_id FROM :ORG_ABBREVTaxonofinterest)
+ UNION ALL
+ SELECT cte.taxon_id, sub.parent_id, lvl + 1
+ FROM cte, sres.taxon sub
+ WHERE cte.parent_id = sub.taxon_id
+ )
+ SELECT c.taxon_id, c.parent_id as species_taxon_id
+ FROM cte c, sres.taxon t
+ WHERE t.taxon_id = c.parent_id
+ AND t.rank='species'
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql b/Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webtables/MO/Taxonomy.psql
new file mode 100644
index 0000000000..249eda7162
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/Taxonomy.psql
@@ -0,0 +1,28 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE Taxonomy as
+ WITH RECURSIVE cte AS (
+ WITH tax AS(
+ SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id,
+ cast(tn.name as varchar(80)) as name,
+ cast(t.rank as varchar(24)) as rank
+ FROM sres.Taxon t, sres.TaxonName tn
+ WHERE t.taxon_id = tn.taxon_id
+ AND tn.name_class = 'scientific name'
+ )
+ SELECT tax.*, name as organism, ARRAY[taxon_id::numeric] as path
+ FROM tax
+ WHERE name IN (SELECT DISTINCT organism FROM GeneAttributes)
+ UNION
+ SELECT tax.*, cte.organism, cte.path || tax.taxon_id as path
+ FROM tax, cte
+ WHERE cte.parent_id = tax.taxon_id
+ AND tax.name != 'root'
+ )
+ SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum
+ FROM (SELECT cte.* FROM cte ORDER BY path) t
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
new file mode 100644
index 0000000000..84083eda42
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
@@ -0,0 +1,9 @@
+
+
+ create index tax_ix
+ on Taxonomy
+ (organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
new file mode 100644
index 0000000000..388d722537
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
@@ -0,0 +1,440 @@
+
+
+ CREATE table :ORG_ABBREVTranscriptUniprot AS
+ select na_feature_id,
+ substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id,
+ substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal
+ FROM (SELECT DISTINCT t.na_feature_id, dr.primary_identifier as uniprot_id
+ FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t,
+ sres.ExternalDatabase d, sres.ExternalDatabaseRelease r
+ WHERE dr.db_ref_id = x.DB_REF_ID
+ AND (x.na_feature_id = t.na_feature_id --
+ or x.na_feature_id = t.parent_id)
+ AND dr.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND (d.name like '%uniprot_dbxref_RSRC'
+ OR d.name like '%dbxref_gene2Uniprot_RSRC'
+ OR d.name = 'Links to Uniprot Genes'
+ OR d.name like '%_dbxref_uniprot_linkout_RSRC'
+ OR d.name like '%_dbxref_uniprot_from_annotation_RSRC')
+ ) t
+ GROUP BY na_feature_id
+
+ ;
+
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVTranscriptAttributes AS
+ WITH genefeat AS (
+ SELECT DISTINCT
+ cast(apidb.prefixed_project_id(tn.name, ':ORG_ABBREV') as varchar(20)) as project_id,
+ -- first the gene attributes:
+ gf.source_id AS gene_source_id,
+ gf.na_feature_id AS gene_na_feature_id,
+ LEAST(nl.start_min, nl.end_max) AS gene_start_min,
+ GREATEST(nl.start_min, nl.end_max) AS gene_end_max,
+ COALESCE(preferred_name.name, any_name.name) AS gene_name,
+ cast(coalesce(preferred_gene_product.product, any_gene_product.product, gf.product)
+ as VARCHAR(300)) as old_gene_product,
+ COALESCE(gp.product, 'unspecified product') as gene_product,
+ REPLACE(so.name, '_', ' ') AS gene_type,
+ gf.name as gene_ebi_biotype,
+ gi.gene_id,
+ transcripts.gene_transcript_count,
+ exons.gene_exon_count,
+ olds.old_ids AS gene_previous_ids,
+ coalesce(deprecated.is_deprecated, 0) as is_deprecated,
+ GREATEST(1, least(nl.start_min, nl.end_max) - 15000) AS gene_context_start,
+ LEAST(gsa.length, greatest(nl.start_min, nl.end_max) + 15000) AS gene_context_end,
+ GREATEST(1, least(nl.start_min, nl.end_max) - 1500) AS gene_zoom_context_start,
+ LEAST(gsa.length, greatest(nl.start_min, nl.end_max) + 1500) AS gene_zoom_context_end,
+ CAST(orthologs.name AS VARCHAR(60)) AS orthomcl_name,
+ coalesce(tothtssnps.total_hts_snps,0) AS gene_total_hts_snps,
+ coalesce(tothtssnps.hts_nonsynonymous_snps,0) AS gene_hts_nonsynonymous_snps,
+ coalesce(tothtssnps.hts_stop_codon_snps,0) AS gene_hts_stop_codon_snps,
+ coalesce(tothtssnps.hts_noncoding_snps,0) AS gene_hts_noncoding_snps,
+ coalesce(tothtssnps.hts_synonymous_snps,0) AS gene_hts_synonymous_snps,
+ coalesce(tothtssnps.hts_nonsyn_syn_ratio,0) AS gene_hts_nonsyn_syn_ratio,
+ CAST(cmnt.comment_string AS VARCHAR(300)) AS comment_string,
+ entrez_table.entrez_id AS gene_entrez_id,
+ gloc.locations AS gene_locations,
+ CAST(gsa.source_id AS VARCHAR(50)) AS sequence_id,
+ CAST(SUBSTR(tn.name, 1, 80) AS VARCHAR(80)) AS organism,
+ CAST(species_name.name AS VARCHAR(60)) AS species,
+ LTRIM(REGEXP_REPLACE(tn.name, replace (replace (species_name.name,'[',''), ']','') ,'')) AS strain,
+ taxon.ncbi_tax_id, tn.taxon_id,
+ so.source_id as so_id,
+ CAST(so.name AS VARCHAR(150)) AS so_term_name,
+ CAST(SUBSTR(so.definition, 1, 150) AS VARCHAR(150)) AS so_term_definition,
+ CAST(soRls.version AS VARCHAR(7)) AS so_version,
+ rt2.anticodon rt2_anticodon,
+ ed.name AS external_db_name,
+ edr.version AS external_db_version,
+ edr.external_database_release_id AS external_db_rls_id,
+ CAST(gsa.chromosome AS VARCHAR(20)) AS chromosome,
+ gsa.sequence_type,
+ gsa.chromosome_order_num, gsa.na_sequence_id
+ FROM dots.GeneFeature gf
+ INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id
+ INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id
+ INNER JOIN :ORG_ABBREVGeneLocations gloc ON gf.source_id = gloc.source_id
+ LEFT JOIN :ORG_ABBREVGeneProduct gp ON gf.source_id = gp.source_id
+ INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id
+ INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id
+ INNER JOIN :ORG_ABBREVGenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id
+ INNER JOIN sres.TaxonName tn ON gsa.taxon_id = tn.taxon_id
+ INNER JOIN sres.Taxon ON gsa.taxon_id = taxon.taxon_id
+ INNER JOIN sres.externalDatabaseRelease soRls ON so.external_database_release_id = soRls.external_database_release_id
+ INNER JOIN (
+ SELECT DISTINCT gene AS source_id FROM :ORG_ABBREVGeneId
+ ) gene ON gf.source_id = gene.source_id
+ LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id
+ LEFT JOIN :ORG_ABBREVTaxonSpecies ts ON gsa.taxon_id = ts.taxon_id
+ LEFT JOIN dots.geneinstance gi ON gf.na_feature_id = gi.na_feature_id
+ INNER JOIN sres.TaxonName species_name ON ts.species_taxon_id = species_name.taxon_id
+ LEFT JOIN (
+ SELECT parent_id, count(*) AS gene_transcript_count
+ FROM dots.Transcript
+ GROUP BY parent_id
+ ) transcripts ON gf.na_feature_id = transcripts.parent_id
+ LEFT JOIN (
+ SELECT parent_id, count(*) AS gene_exon_count
+ FROM dots.ExonFeature
+ GROUP BY parent_id
+ ) exons ON gf.na_feature_id = exons.parent_id
+ LEFT JOIN (
+ SELECT nfc.na_feature_id, MAX(SUBSTR(nfc.comment_string, 300, 1)) AS comment_string
+ FROM dots.NaFeatureComment nfc
+ GROUP BY nfc.na_feature_id
+ ) cmnt ON gf.na_feature_id = cmnt.na_feature_id
+ LEFT JOIN (
+ SELECT distinct drnf.na_feature_id as gene_na_feature_id, 1 as is_deprecated
+ FROM dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ed.name = 'gassAWB_dbxref_gene2Deprecated_RSRC'
+ ) deprecated ON gf.na_feature_id = deprecated.gene_na_feature_id
+ LEFT JOIN (
+ SELECT gene_source_id, total_hts_snps, hts_nonsynonymous_snps, hts_stop_codon_snps,hts_noncoding_snps,hts_synonymous_snps,
+ case when (hts_nonsynonymous_snps is null) then 0
+ when (hts_synonymous_snps = 0) then 0
+ else round ((hts_nonsynonymous_snps/ hts_synonymous_snps), 2) end as hts_nonsyn_syn_ratio
+ FROM (
+ select gene_source_id,
+ count(*) as total_hts_snps,
+ sum(has_nonsynonymous_allele) as hts_nonsynonymous_snps,
+ sum(has_stop_codon) as hts_stop_codon_snps,
+ sum(is_noncoding_snp) as hts_noncoding_snps,
+ count(*) - sum(has_nonsynonymous_allele) - sum(has_stop_codon) - sum(is_noncoding_snp) as hts_synonymous_snps
+ FROM :ORG_ABBREVSnpAttributes
+ WHERE gene_source_id is not null
+ GROUP by gene_source_id
+ ) t
+ ) tothtssnps ON gf.source_id = tothtssnps.gene_source_id
+ LEFT JOIN (
+ SELECT ssg.sequence_id as gene_na_feature_id, sg.name
+ FROM dots.SequenceSequenceGroup ssg,
+ dots.sequencegroup sg, core.tableinfo ti
+ WHERE ssg.sequence_group_id = sg.sequence_group_id
+ AND ssg.source_table_id = ti.table_id
+ AND ti.name = 'GeneFeature'
+ ) orthologs ON gf.na_feature_id = orthologs.gene_na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(product) as product
+ FROM apidb.GeneFeatureProduct
+ WHERE is_preferred = 1
+ GROUP BY na_feature_id
+ ) preferred_gene_product ON gf.na_feature_id = preferred_gene_product.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(product) as product
+ FROM apidb.GeneFeatureProduct
+ GROUP BY na_feature_id
+ ) any_gene_product ON gf.na_feature_id = any_gene_product.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(name) as name
+ FROM apidb.GeneFeatureName
+ WHERE is_preferred = 1
+ GROUP BY na_feature_id
+ EXCEPT
+ -- suppress gene/name associations from the *DELETED_RSRC databases
+ SELECT gfn.na_feature_id, gfn.name
+ FROM apidb.GeneFeatureName gfn,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE gfn.external_database_release_id = edr.external_database_release_id
+ AND ed.external_database_id = edr.external_database_id
+ AND ed.name like '%DELETED_RSRC'
+ ) preferred_name ON gf.na_feature_id = preferred_name.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(name) as name
+ FROM apidb.GeneFeatureName
+ GROUP by na_feature_id
+ EXCEPT
+ -- suppress gene/name associations from the *DELETED_RSRC databases
+ SELECT gfn.na_feature_id, gfn.name
+ FROM apidb.GeneFeatureName gfn,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ WHERE gfn.external_database_release_id = edr.external_database_release_id
+ AND ed.external_database_id = edr.external_database_id
+ AND ed.name like '%DELETED_RSRC'
+ ) any_name ON gf.na_feature_id = any_name.na_feature_id
+ LEFT JOIN (
+ SELECT dbna.na_feature_id,
+ substr(string_agg(db.primary_identifier, ',' order by db.primary_identifier), 1, 300) as entrez_id
+ FROM sres.ExternalDatabaseRelease edr, sres.DbRef db,
+ dots.DbRefNaFeature dbna, sres.ExternalDatabase ed
+ WHERE edr.external_database_release_id = db.external_database_release_id
+ AND ed.external_database_id = edr.external_database_id
+ AND dbna.db_ref_id = db.db_ref_id
+ AND lower(ed.name) like '%entrez%'
+ GROUP BY dbna.na_feature_id
+ ) entrez_table ON gf.na_feature_id = entrez_table.na_feature_id
+ LEFT JOIN (
+ SELECT drnf.na_feature_id,
+ substr(string_agg(dr.primary_identifier, ';' order by dr.primary_identifier), 1, 900) as old_ids
+ FROM dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE dr.primary_identifier is not null
+ AND drnf.db_ref_id = dr.db_ref_id
+ AND dr.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND edr.id_type = 'previous id'
+ GROUP BY drnf.na_feature_id
+ ) olds ON gf.na_feature_id = olds.na_feature_id
+ WHERE nl.is_top_level = 1
+ AND nl.feature_type = 'GeneFeature'
+ AND (gsa.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
+ AND species_name.name_class = 'scientific name'
+ AND (gf.is_predicted != 1 OR gf.is_predicted is null)
+ AND tn.name_class = 'scientific name'
+ AND tn.taxon_id NOT IN (SELECT o.taxon_id FROM apidb.Organism o WHERE o.is_annotated_genome=0)
+ AND tn.name not in ('Plasmodium gallinaceum','Plasmodium reichenowi')
+ ), transcript AS (
+ SELECT DISTINCT
+ t.parent_id as transcript_parent_id,
+ -- first the gene attributes:
+ cast(null as varchar(80)) as representative_transcript,
+ 0 as gene_paralog_number, 0 as gene_ortholog_number,
+ transcript_uniprot.uniprot_id, transcript_uniprot.uniprot_id_internal,
+ -- next the transcript attributes:
+ t.source_id AS transcript_source_id,
+ tso.name as transcript_type,
+ t.na_feature_id,
+ CAST(coalesce(preferred_tx_product.product, any_tx_product.product, t.product)
+ AS VARCHAR(300))
+ AS transcript_product,
+ tl.start_min, tl.end_max,
+ tl.is_reversed, --CHECK if needed
+ CASE coalesce(tl.is_reversed, 0) WHEN 0 THEN 'forward' WHEN 1 THEN 'reverse' ELSE tl.is_reversed::varchar END AS strand,
+ CASE t.is_pseudo WHEN null THEN 0 ELSE t.is_pseudo END as is_pseudo,
+ transcript_exons.exon_count,
+ sns.length AS length, sns.na_sequence_id as spliced_na_sequence_id,
+ rt1.anticodon rt1_anticodon,
+ utr_lengths.five_prime_utr_length,
+ utr_lengths.three_prime_utr_length
+ FROM dots.Transcript t
+ LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id
+ LEFT JOIN dots.SplicedNaSequence sns ON t.na_sequence_id = sns.na_sequence_id
+ INNER JOIN sres.OntologyTerm tso ON t.sequence_ontology_id = tso.ontology_term_id
+ INNER JOIN apidb.TranscriptLocation tl ON tl.feature_source_id = t.source_id
+ INNER JOIN (
+ SELECT rna_feature_id, count(*) as exon_count
+ FROM dots.RnaFeatureExon
+ GROUP BY rna_feature_id
+ ) transcript_exons ON t.na_feature_id = transcript_exons.rna_feature_id
+ INNER JOIN (
+ SELECT t.na_feature_id as transcript_na_feature_id, five_prime.utr_length as five_prime_utr_length, three_prime.utr_length as three_prime_utr_length
+ FROM dots.transcript t
+ LEFT JOIN (
+ SELECT parent_id, sum(end_max-start_min + 1) as utr_length FROM apidb.UtrLocation
+ WHERE direction = 5 AND is_top_level = 1
+ GROUP BY parent_id
+ ) five_prime ON t.na_feature_id = five_prime.parent_id
+ LEFT JOIN (
+ SELECT parent_id, sum(end_max-start_min + 1) as utr_length FROM apidb.UtrLocation
+ WHERE direction = 3 AND is_top_level = 1
+ GROUP BY parent_id
+ ) three_prime ON t.na_feature_id = three_prime.parent_id
+ ) utr_lengths ON t.na_feature_id = utr_lengths.transcript_na_feature_id
+ LEFT JOIN :ORG_ABBREVTranscriptUniprot transcript_uniprot ON t.na_feature_id = transcript_uniprot.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(product) as product
+ FROM apidb.TranscriptProduct
+ WHERE is_preferred = 1
+ GROUP BY na_feature_id
+ ) preferred_tx_product ON t.na_feature_id = preferred_tx_product.na_feature_id
+ LEFT JOIN (
+ SELECT na_feature_id, max(product) as product
+ FROM apidb.TranscriptProduct
+ GROUP BY na_feature_id
+ ) any_tx_product ON t.na_feature_id = any_tx_product.na_feature_id
+ WHERE
+ tl.is_top_level=1
+ )
+ SELECT DISTINCT
+ genefeat.project_id,
+ transcript.transcript_source_id AS source_id,
+ -- first the gene attributes:
+ genefeat.gene_source_id,
+ genefeat.gene_na_feature_id,
+ genefeat.gene_start_min,
+ genefeat.gene_end_max,
+ genefeat.gene_name,
+ CAST(COALESCE(genefeat.old_gene_product, transcript.transcript_product,
+ case when transcript.is_pseudo = 1
+ then 'pseudogene'
+ else 'unspecified product'
+ end
+ ) as VARCHAR(300)
+ ) as old_gene_product,
+ genefeat.gene_product,
+ genefeat.gene_type,
+ genefeat.gene_ebi_biotype,
+ genefeat.gene_id,
+ genefeat.gene_transcript_count,
+ genefeat.gene_exon_count,
+ cast(null as varchar(80)) as representative_transcript,
+ genefeat.gene_previous_ids,
+ genefeat.is_deprecated,
+ 0 as gene_paralog_number, 0 as gene_ortholog_number,
+ genefeat.gene_context_start,
+ genefeat.gene_context_end,
+ genefeat.gene_zoom_context_start,
+ genefeat.gene_zoom_context_end,
+ genefeat.orthomcl_name,
+ genefeat.gene_total_hts_snps,
+ genefeat.gene_hts_nonsynonymous_snps,
+ genefeat.gene_hts_stop_codon_snps,
+ genefeat.gene_hts_noncoding_snps,
+ genefeat.gene_hts_synonymous_snps,
+ genefeat.gene_hts_nonsyn_syn_ratio,
+ genefeat.comment_string,
+ transcript.uniprot_id, transcript.uniprot_id_internal,
+ genefeat.gene_entrez_id,
+ genefeat.gene_locations,
+ -- next the transcript attributes:
+ transcript.transcript_source_id,
+ transcript.transcript_type,
+ transcript.na_feature_id,
+ CAST(coalesce(transcript.transcript_product, genefeat.old_gene_product,
+ case when transcript.is_pseudo = 1
+ then 'pseudogene'
+ else 'unspecified product'
+ end
+ )AS VARCHAR(300)
+ ) as transcript_product,
+ transcript.start_min, transcript.end_max,
+ transcript.is_reversed, --CHECK if needed
+ transcript.strand,
+ transcript.is_pseudo,
+ transcript.exon_count,
+ transcript.length,
+ transcript.spliced_na_sequence_id,
+ genefeat.sequence_id,
+ genefeat.organism,
+ genefeat.species,
+ genefeat.strain,
+ genefeat.ncbi_tax_id, genefeat.taxon_id,
+ genefeat.so_id,
+ genefeat.so_term_name,
+ genefeat.so_term_definition,
+ genefeat.so_version,
+ CAST(coalesce(rt1_anticodon, rt2_anticodon)AS VARCHAR(3)) AS anticodon,
+ genefeat.external_db_name,
+ genefeat.external_db_version,
+ genefeat.external_db_rls_id,
+ genefeat.chromosome,
+ genefeat.sequence_type,
+ genefeat.chromosome_order_num, genefeat.na_sequence_id,
+ --next the protein attributes:
+ pa.source_id AS protein_source_id,
+ pa.aa_sequence_id,
+ pa.cds_start as coding_start,
+ pa.cds_end as coding_end,
+ pa.cds_length,
+ pa.protein_length,
+ pa.has_seqedit,
+ pa.tm_count,
+ pa.molecular_weight,
+ pa.isoelectric_point,
+ pa.signalp_peptide,
+ pa.ec_numbers, pa.ec_numbers_derived,
+ pa.annotated_go_component,
+ pa.annotated_go_function,
+ pa.annotated_go_process,
+ pa.predicted_go_component,
+ pa.predicted_go_function,
+ pa.predicted_go_process,
+ pa.annotated_go_id_component,
+ pa.annotated_go_id_function,
+ pa.annotated_go_id_process,
+ pa.predicted_go_id_component,
+ pa.predicted_go_id_function,
+ pa.predicted_go_id_process,
+ transcript.five_prime_utr_length,
+ transcript.three_prime_utr_length
+ FROM genefeat
+ INNER JOIN transcript ON genefeat. gene_na_feature_id = transcript.transcript_parent_id
+ LEFT JOIN :ORG_ABBREVproteinattributes pa ON transcript.transcript_source_id = pa.transcript_source_id AND pa.rank_in_transcript = 1
+ ORDER BY taxon_id, source_id
+
+
+:DECLARE_PARTITION;
+
+
+
+ UPDATE :ORG_ABBREVTranscriptAttributes ta
+ SET exon_count = (SELECT count(*) + 1 FROM apidb.IntronLocation il WHERE il.parent_id = ta.na_feature_id AND il.end_max - il.start_min + 1 > 10 )
+ WHERE ta.project_id = 'TriTrypDB'
+
+ ;
+
+
+
+ UPDATE :ORG_ABBREVTranscriptAttributes gaup
+ SET gene_paralog_number = (
+ SELECT count(distinct gene_source_id)
+ FROM :ORG_ABBREVTranscriptAttributes g1
+ WHERE g1.orthomcl_name = gaup.orthomcl_name
+ AND g1.organism = gaup.organism
+ AND gaup.gene_source_id != g1.gene_source_id
+ ),
+ gene_ortholog_number = (
+ SELECT count(distinct gene_source_id)
+ FROM :ORG_ABBREVTranscriptAttributes g1
+ WHERE g1.orthomcl_name = gaup.orthomcl_name
+ AND g1.organism != gaup.organism
+ )
+ WHERE (gaup.gene_type = 'protein coding' or gaup.gene_type = 'protein coding gene')
+
+ ;
+
+
+
+ UPDATE :ORG_ABBREVTranscriptAttributes
+ SET gene_id = gene_na_feature_id + (select coalesce(max(gene_id), 0) from dots.gene)
+ WHERE gene_id is null
+
+ ;
+
+
+
+ UPDATE :ORG_ABBREVTranscriptAttributes
+ SET representative_transcript = (
+ select min(source_id)
+ from :ORG_ABBREVTranscriptAttributes ga
+ where ga.gene_source_id = :ORG_ABBREVTranscriptAttributes .gene_source_id
+ )
+ WHERE representative_transcript is null
+ AND gene_id is not null
+
+ ;
+
+
+
+ UPDATE :ORG_ABBREVTranscriptAttributes
+ SET representative_transcript = source_id
+ WHERE representative_transcript is null
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
new file mode 100644
index 0000000000..bcb9c3e57f
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
@@ -0,0 +1,135 @@
+
+
+ CREATE UNIQUE INDEX TranscriptAttr_sourceId
+ ON :ORG_ABBREVTranscriptAttributes (source_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TranscriptAttr_srcPrj
+ ON :ORG_ABBREVTranscriptAttributes (source_id, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TranscriptAttr_genesrc
+ ON :ORG_ABBREVTranscriptAttributes (gene_source_id, source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TranscriptAttr_exon_ix
+ ON :ORG_ABBREVTranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TranscriptAttr_loc_ix
+ ON :ORG_ABBREVTranscriptAttributes
+ (na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id,
+ is_deprecated, source_id, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TranscriptAttr_feat_ix
+ ON :ORG_ABBREVTranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TranscriptAttr_geneid_ix
+ ON :ORG_ABBREVTranscriptAttributes (gene_id, source_id, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TransAttr_orthoname_ix
+ ON :ORG_ABBREVTranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TransAttr_molwt_ix
+ ON :ORG_ABBREVTranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE INDEX TransAttr_ortholog_ix
+ ON :ORG_ABBREVTranscriptAttributes
+ (source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE INDEX TransAttr_orgsrc_ix
+ ON :ORG_ABBREVTranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max)
+
+
+ ;
+
+
+
+ CREATE INDEX TransAttr_lwrsrc_ix
+ ON :ORG_ABBREVTranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id)
+
+
+ ;
+
+
+
+ CREATE INDEX TransAttr_species_ix
+ ON :ORG_ABBREVTranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TrnscrptAttr_geneinfo
+ ON :ORG_ABBREVTranscriptAttributes
+ (gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id,
+ protein_source_id, na_sequence_id, length, protein_length,
+ five_prime_utr_length, three_prime_utr_length)
+
+
+ ;
+
+
+
+ CREATE UNIQUE INDEX TranscriptAttr_genenaf
+ ON :ORG_ABBREVTranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id)
+
+
+ ;
+
+
+
+ CREATE INDEX TransAttr_locsIds_ix
+ ON :ORG_ABBREVTranscriptAttributes
+ (na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql
new file mode 100644
index 0000000000..9715d5f08b
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql
@@ -0,0 +1,19 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE TranscriptCenDistance AS
+ SELECT DISTINCT tl.feature_source_id AS transcript,
+ LEAST(ABS(mfl.start_min - tl.end_max),
+ ABS(mfl.end_max - tl.start_min)) AS centromere_distance,
+ tl.sequence_source_id AS genomic_sequence
+ FROM apidb.TranscriptLocation tl, apidb.FeatureLocation mfl,
+ sres.OntologyTerm so
+ WHERE tl.na_sequence_id = mfl.na_sequence_id
+ AND mfl.feature_type = 'Miscellaneous'
+ AND mfl.sequence_ontology_id = so.ontology_term_id
+ AND so.name = 'centromere'
+ AND tl.is_top_level = 1
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
new file mode 100644
index 0000000000..e6630dae0d
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
@@ -0,0 +1,8 @@
+
+
+ create index GCent_loc_ix
+ on TranscriptCenDistance (genomic_sequence, centromere_distance)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway.psql b/Model/lib/psql/webtables/MO/TranscriptPathway.psql
new file mode 100644
index 0000000000..4b1c95b668
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TranscriptPathway.psql
@@ -0,0 +1,112 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE TranscriptPathway (
+ SOURCE_ID VARCHAR(80),
+ GENE_SOURCE_ID VARCHAR(80),
+ PROJECT_ID VARCHAR(20),
+ PATHWAY_SOURCE_ID VARCHAR(50),
+ PATHWAY_NAME VARCHAR(150),
+ EC_NUMBER_GENE VARCHAR(16),
+ WILDCARD_COUNT_GENE NUMERIC,
+ EC_NUMBER_PATHWAY VARCHAR(16),
+ WILDCARD_COUNT_PATHWAY NUMERIC,
+ EXACT_MATCH NUMERIC,
+ COMPLETE_EC NUMERIC,
+ PATHWAY_ID NUMERIC(12,0),
+ PATHWAY_SOURCE VARCHAR(200),
+ EXTERNAL_DATABASE_RELEASE_ID NUMERIC(10,0)
+ )
+
+
+:DECLARE_PARTITION;
+
+
+
+ DO $$
+ DECLARE
+ idlist RECORD;
+ BEGIN
+ FOR idlist IN ( SELECT DISTINCT organism FROM GeneAttributes )
+ LOOP
+ INSERT INTO TranscriptPathway
+ WITH transcript_ec AS (
+ SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
+ -- CHECK AND FIX
+ -- regexp_count( ec.ec_number, '-') as wildcard_count
+ count( ec.ec_number) as wildcard_count
+ FROM sres.EnzymeClass ec
+ WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM dots.AaSequenceEnzymeClass)
+ GROUP BY ec.enzyme_class_id
+ ),
+ pathway_node_ec AS (
+ SELECT distinct pn.pathway_id, pn.row_id as enzyme_class_id
+ FROM sres.PathwayNode pn, sres.ontologyterm ot
+ WHERE pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'enzyme'
+ AND pn.display_label != '-.-.-.-'
+ ),
+ pathway_ec AS (
+ SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
+ -- CHECK AND FIX
+ -- regexp_count( ec.ec_number, '-') as wildcard_count
+ count( ec.ec_number) as wildcard_count
+ FROM sres.EnzymeClass ec
+ WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM pathway_node_ec)
+ GROUP BY ec.enzyme_class_id
+ ),
+ ec_match AS (
+ SELECT tec.enzyme_class_id as transcript_enzyme_class_id,
+ pec.enzyme_class_id as pathway_enzyme_class_id,
+ tec.wildcard_count as wildcard_count_transcript,
+ pec.wildcard_count as wildcard_count_pathway,
+ tec.ec_number as ec_number_transcript,
+ pec.ec_number as ec_number_pathway
+ FROM transcript_ec tec, pathway_ec pec
+ WHERE (tec.ec_number_1 = pec.ec_number_1 or tec.ec_number_1 is null or pec.ec_number_1 is null)
+ AND (tec.ec_number_2 = pec.ec_number_2 or tec.ec_number_2 is null or pec.ec_number_2 is null)
+ AND (tec.ec_number_3 = pec.ec_number_3 or tec.ec_number_3 is null or pec.ec_number_3 is null)
+ AND (tec.ec_number_4 = pec.ec_number_4 or tec.ec_number_4 is null or pec.ec_number_4 is null)
+ )
+ SELECT DISTINCT ga.source_id
+ , ga.gene_source_id
+ , ga.project_id
+ , pa.source_id as pathway_source_id
+ , pa.name as pathway_name
+ , ec_match.ec_number_transcript as ec_number_gene
+ , ec_match.wildcard_count_transcript as wildcard_count_gene
+ , ec_match.ec_number_pathway
+ , ec_match.wildcard_count_pathway
+ , CASE WHEN ec_match.ec_number_pathway = ec_match.ec_number_transcript
+ THEN 1
+ ELSE 0 END as exact_match
+ , CASE WHEN ec_match.wildcard_count_pathway + ec_match.wildcard_count_transcript = 0
+ THEN 1
+ ELSE 0 END as complete_ec
+ , pa.pathway_id
+ , pa.pathway_source
+ , p.external_database_release_id
+ FROM PathwayAttributes pa
+ , sres.pathway p
+ , pathway_node_ec pec
+ , ec_match
+ , dots.AaSequenceEnzymeClass asec
+ , TranscriptAttributes ga
+ WHERE ga.organism = idlist.organism
+ AND pa.pathway_id = pec.pathway_id
+ AND p.pathway_id = pa.pathway_id
+ AND pec.enzyme_class_id = ec_match.pathway_enzyme_class_id
+ AND asec.enzyme_class_id = ec_match.transcript_enzyme_class_id
+ AND ga.aa_sequence_id = asec.aa_sequence_id
+ AND (
+ (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived')
+ OR ga.orthomcl_name IS NOT NULL
+ )
+ ;
+ commit;
+ END LOOP;
+ END;
+ $$ LANGUAGE PLPGSQL;
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
new file mode 100644
index 0000000000..7587695697
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
@@ -0,0 +1,18 @@
+
+
+ create index TranscriptPath_ix
+ on TranscriptPathway (gene_source_id, source_id, pathway_source_id,
+ pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway,
+ ec_number_pathway, pathway_source)
+
+
+ ;
+
+
+
+ create index TranscriptPathSource_ix
+ on TranscriptPathway (pathway_source, gene_source_id, source_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence.psql b/Model/lib/psql/webtables/MO/TranscriptSequence.psql
new file mode 100644
index 0000000000..0cc12e2ff2
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TranscriptSequence.psql
@@ -0,0 +1,11 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE :ORG_ABBREVTranscriptSequence AS
+ SELECT ta.source_id, ta.project_id, sns.sequence
+ FROM :ORG_ABBREVTranscriptAttributes ta, dots.SplicedNaSequence sns
+ WHERE ta.source_id = sns.source_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
new file mode 100644
index 0000000000..079e8faf28
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
@@ -0,0 +1,7 @@
+
+
+ create index XScriptSeq_ix on :ORG_ABBREVTranscriptSequence (source_id, project_id)
+
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql
new file mode 100644
index 0000000000..7f76786eb0
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql
@@ -0,0 +1,27 @@
+:CREATE_AND_POPULATE
+
+
+ CREATE TABLE TransmembraneDomains AS
+ SELECT ta.source_id as transcript_source_id
+ , ta.gene_source_id AS gene_source_id
+ , ta.project_id
+ , tmf.topology AS tmf_topology
+ , aal.start_min AS tmf_start_min
+ , aal.end_max AS tmf_end_max
+ , substr(tas.sequence, aal.end_max::INTEGER - aal.start_min::INTEGER + 1, aal.start_min::INTEGER) AS tmf_sequence
+ , tmf.aa_feature_id tmf_aa_feature_id
+ , tmf.aa_sequence_id tmf_aa_sequence_id
+ , tas.source_id as protein_source_id
+ FROM dots.aalocation aal
+ , transcriptattributes ta
+ , dots.translatedaafeature taf
+ , dots.translatedaasequence tas
+ , dots.transmembraneaafeature tmf
+ WHERE ta.na_feature_id = taf.na_feature_id
+ AND taf.aa_sequence_id = tas.aa_sequence_id
+ AND tas.aa_sequence_id = tmf.aa_sequence_id
+ and tmf.aa_feature_id = aal.aa_feature_id
+
+
+:DECLARE_PARTITION;
+
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
new file mode 100644
index 0000000000..bdefeef42d
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
@@ -0,0 +1,8 @@
+
+
+ create index TransDom1_ix
+ on TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology)
+
+
+ ;
+
From 0038dbd8ab6f98e76f6fa847e5dfdae2d9726510 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 9 May 2025 15:11:53 -0400
Subject: [PATCH 002/112] get MG to conform
---
.../psql/webtables/MG/CompoundAttributes.psql | 13 +-
Model/lib/psql/webtables/MG/CompoundId.psql | 23 ++--
.../psql/webtables/MG/CompoundProperties.psql | 7 +-
.../psql/webtables/MG/CompoundTypeAheads.psql | 9 +-
.../webtables/MG/GroupDomainAttribute.psql | 14 +--
.../lib/psql/webtables/MG/OntologyLevels.psql | 37 ++----
.../psql/webtables/MG/PathwayAttributes.psql | 21 +---
.../psql/webtables/MG/PathwayCompounds.psql | 13 +-
Model/lib/psql/webtables/MG/PathwayNodes.psql | 111 ++++++++----------
.../psql/webtables/MG/PathwayReactions.psql | 23 ++--
10 files changed, 97 insertions(+), 174 deletions(-)
diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes.psql b/Model/lib/psql/webtables/MG/CompoundAttributes.psql
index 3f69995d58..ea8207e077 100644
--- a/Model/lib/psql/webtables/MG/CompoundAttributes.psql
+++ b/Model/lib/psql/webtables/MG/CompoundAttributes.psql
@@ -1,6 +1,4 @@
-
-
- CREATE TABLE :ORG_ABBREVCompoundAttributes AS
+ CREATE TABLE :SCHEMA.CompoundAttributes AS
SELECT p.ID
, p.source_id
, p.compound_name
@@ -16,13 +14,8 @@
WHERE p.parent_id IS NULL
AND ( p.ID = childc.parent_id OR p.ID = childc.ID )
GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids
-
- ;
-
-
-
- CREATE INDEX :ORG_ABBREVCompoundAttributes_idx ON :ORG_ABBREVCompoundAttributes (source_id)
+ ;
-
+ CREATE INDEX :SCHEMA.CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id)
;
diff --git a/Model/lib/psql/webtables/MG/CompoundId.psql b/Model/lib/psql/webtables/MG/CompoundId.psql
index 64eafbd86a..e235b32106 100644
--- a/Model/lib/psql/webtables/MG/CompoundId.psql
+++ b/Model/lib/psql/webtables/MG/CompoundId.psql
@@ -1,15 +1,13 @@
-
-
- CREATE TABLE :ORG_ABBREVCompoundId AS
+ CREATE TABLE :SCHEMA.CompoundId AS
SELECT source_id AS id, source_id AS compound, 'same ID' AS type, '' as source
- FROM :ORG_ABBREVCompoundAttributes
+ FROM :SCHEMA.CompoundAttributes
UNION
SELECT p.source_id AS id, ca.source_id AS compound, 'child ID' AS type, '' as source
- FROM :ORG_ABBREVCompoundAttributes ca, CompoundProperties p
+ FROM :SCHEMA.CompoundAttributes ca, CompoundProperties p
WHERE ca.id = p.parent_id
UNION
SELECT da.accession_number AS id, p.source_id AS compound, 'KEGG' AS type, '' as source
- FROM chebi.database_accession da, :ORG_ABBREVCompoundAttributes p
+ FROM chebi.database_accession da, :SCHEMA.CompoundAttributes p
WHERE da.type='KEGG COMPOUND accession'
AND da.compound_id = p.id
UNION
@@ -19,21 +17,16 @@
AND da.compound_id = c.id AND c.parent_id=p.id
UNION
SELECT n.name as id, ca.source_id as compound, 'name' as type, n.source
- FROM :ORG_ABBREVCompoundAttributes ca, chebi.names n
+ FROM :SCHEMA.CompoundAttributes ca, chebi.names n
WHERE ca.id = n.compound_id
AND n.type = 'NAME'
UNION
SELECT n.name as id, ca.source_id as compound, 'synonym' as type, n.source
- FROM :ORG_ABBREVCompoundAttributes ca, chebi.names n
+ FROM :SCHEMA.CompoundAttributes ca, chebi.names n
WHERE ca.id = n.compound_id
- AND n.type = 'SYNONYM'
-
+ AND n.type = 'SYNONYM'
;
-
-
-
- CREATE INDEX :ORG_ABBREVCompoundId_idx ON :ORG_ABBREVCompoundId (id, compound)
-
+ CREATE INDEX :SCHEMA.CompoundId_idx ON :SCHEMA.CompoundId (id, compound)
;
diff --git a/Model/lib/psql/webtables/MG/CompoundProperties.psql b/Model/lib/psql/webtables/MG/CompoundProperties.psql
index 7bea00b811..5faddf835f 100644
--- a/Model/lib/psql/webtables/MG/CompoundProperties.psql
+++ b/Model/lib/psql/webtables/MG/CompoundProperties.psql
@@ -1,6 +1,4 @@
-
-
- CREATE TABLE CompoundProperties AS
+ CREATE TABLE :SCHEMA.CompoundProperties AS
SELECT c.ID, c.chebi_accession AS source_id, c.parent_id,
c.name AS compound_name,
substr(string_agg(cn.name, ';'), 1, 1000) AS other_names,
@@ -23,7 +21,6 @@
LEFT JOIN ( SELECT parent_id, chebi_accession FROM chebi.compounds)
sec ON c.ID = sec.parent_id
WHERE NOT c.status in ('D', 'F')
- GROUP BY c.ID, c.chebi_accession, c.parent_id, c.name, c.definition, m.mass
-
+ GROUP BY c.ID, c.chebi_accession, c.parent_id, c.name, c.definition, m.mass
;
diff --git a/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql b/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql
index 244b989c5c..dfefb18f98 100644
--- a/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql
+++ b/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql
@@ -1,15 +1,12 @@
-
-
- CREATE TABLE CompoundTypeAheads AS
+ CREATE TABLE :SCHEMA.CompoundTypeAheads AS
SELECT ca.source_id AS compound_id,
ca.source_id || ' (' || ca.compound_name || ')' AS display
- FROM CompoundAttributes ca, PathwayCompounds pc
+ FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.PathwayCompounds pc
WHERE pc.chebi_accession = ca.source_id
UNION
SELECT ca.source_id AS compound_id,
pc.compound_source_id || ' (' || ca.compound_name || ')' AS display
- FROM CompoundAttributes ca, PathwayCompounds pc
+ FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.PathwayCompounds pc
WHERE pc.chebi_accession = ca.source_id
-
;
diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
index 766215dcc9..b023dc3669 100644
--- a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
+++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
@@ -1,6 +1,4 @@
-
-
-CREATE TABLE GroupDomainAttribute AS
+CREATE TABLE SCHEMA.GroupDomainAttribute AS
(
SELECT og.group_id AS group_name, ag.descriptions
FROM apidb.OrthologGroup og,
@@ -10,7 +8,7 @@ FROM apidb.OrthologGroup og,
FROM (SELECT group_name, accession, num_proteins,
rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk
FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins
- FROM DomainAssignment
+ FROM SCHEMA.DomainAssignment
GROUP BY group_name,accession
)
)
@@ -21,12 +19,8 @@ FROM apidb.OrthologGroup og,
) ag
WHERE og.group_id = ag.group_name
)
+;
- ;
-
-
-
-CREATE INDEX GroupDomainAttribute_idx ON GroupDomainAttribute (group_name)
-
+CREATE INDEX SCHEMA.GroupDomainAttribute_idx ON SCHEMA.GroupDomainAttribute (group_name)
;
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql
index a117a4a74d..417c6657aa 100644
--- a/Model/lib/psql/webtables/MG/OntologyLevels.psql
+++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql
@@ -1,48 +1,35 @@
-
-
- CREATE UNLOGGED TABLE :ORG_ABBREVIs_a_links AS
+ CREATE UNLOGGED TABLE :SCHEMA.Is_a_links AS
SELECT subject_term_id, object_term_id
FROM sres.OntologyRelationship rel, sres.OntologyTerm pred
WHERE rel.predicate_term_id = pred.ontology_term_id
AND pred.name = 'is_a'
-
;
-
-
- CREATE UNLOGGED TABLE :ORG_ABBREVRoots AS
- SELECT object_term_id FROM :ORG_ABBREVis_a_links
+ CREATE UNLOGGED TABLE :SCHEMA.Roots AS
+ SELECT object_term_id FROM :SCHEMA.is_a_links
EXCEPT
- SELECT subject_term_id FROM :ORG_ABBREVis_a_links
-
+ SELECT subject_term_id FROM :SCHEMA.is_a_links
;
-
-
- CREATE TABLE :ORG_ABBREVOntologyLevels as
+ CREATE TABLE :SCHEMA.OntologyLevels as
WITH RECURSIVE levels(ontology_term_id, depth) AS (
- SELECT object_term_id, 1 as depth FROM :ORG_ABBREVRoots
+ SELECT object_term_id, 1 as depth FROM :SCHEMA.Roots
UNION
- SELECT :ORG_ABBREVis_a_links.subject_term_id, levels.depth + 1 as depth
- FROM :ORG_ABBREVIs_a_links, levels
- WHERE :ORG_ABBREVis_a_links.object_term_id = levels.ontology_term_id
+ SELECT :SCHEMA.is_a_links.subject_term_id, levels.depth + 1 as depth
+ FROM :SCHEMA.Is_a_links, levels
+ WHERE :SCHEMA.is_a_links.object_term_id = levels.ontology_term_id
)
SELECT ontology_term_id, min(depth) as min_depth, max(depth) as max_depth
FROM (
SELECT ontology_term_id, depth
FROM levels
- WHERE ontology_term_id NOT IN (SELECT object_term_id FROM :ORG_ABBREVRoots)
+ WHERE ontology_term_id NOT IN (SELECT object_term_id FROM :SCHEMA.Roots)
UNION
- SELECT object_term_id, 0 FROM :ORG_ABBREVRoots
+ SELECT object_term_id, 0 FROM :SCHEMA.Roots
) t
GROUP BY ontology_term_id
-
;
-
-
- create index olev_termix on :ORG_ABBREVOntologyLevels (ontology_term_id, min_depth, max_depth)
-
-
+ create index olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth)
;
diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webtables/MG/PathwayAttributes.psql
index 445fd4bffb..12720d8b73 100644
--- a/Model/lib/psql/webtables/MG/PathwayAttributes.psql
+++ b/Model/lib/psql/webtables/MG/PathwayAttributes.psql
@@ -1,6 +1,4 @@
-
-
- CREATE TABLE PathwayAttributes as
+ CREATE TABLE :SCHEMA.PathwayAttributes as
SELECT
p.source_id
, p.pathway_id
@@ -47,22 +45,13 @@
AND source_id NOT IN('ec01100', 'ec01110', 'ec01120')
-- temporarily remove MPMP from release 46
AND ed.name NOT LIKE '%MPMP%'
-
;
-
-
- CREATE UNIQUE INDEX PathAttr_sourceId_pwaySrc
- ON :ORG_ABBREVPathwayAttributes (source_id, pathway_source)
-
-
+ CREATE UNIQUE INDEX :SCHEMA.PathAttr_sourceId_pwaySrc
+ ON :SCHEMA.PathwayAttributes (source_id, pathway_source)
;
-
-
- create index PathAttr_ix
- on PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count)
-
-
+ create index :SCHEMA.PathAttr_ix
+ on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count)
;
diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds.psql b/Model/lib/psql/webtables/MG/PathwayCompounds.psql
index 1ca74c7a6c..a30b299c31 100644
--- a/Model/lib/psql/webtables/MG/PathwayCompounds.psql
+++ b/Model/lib/psql/webtables/MG/PathwayCompounds.psql
@@ -1,6 +1,4 @@
-
-
- CREATE TABLE PathwayCompounds AS
+ CREATE TABLE :SCHEMA.PathwayCompounds AS
SELECT
pathway_id
, reaction_id
@@ -78,14 +76,9 @@
AND p.EXTERNAL_DATABASE_RELEASE_ID = edr.EXTERNAL_DATABASE_RELEASE_ID
AND edr.EXTERNAL_DATABASE_ID = ed.EXTERNAL_DATABASE_ID
) t2 LEFT OUTER JOIN CHEBI.COMPOUNDS c on t2.row_id = c.ID
-
;
-
-
- create index PthCmpd_id_ix
- on PathwayCompounds (pathway_id, reaction_id, ext_db_name)
-
-
+ create index :SCHEMA.PthCmpd_id_ix
+ on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name)
;
diff --git a/Model/lib/psql/webtables/MG/PathwayNodes.psql b/Model/lib/psql/webtables/MG/PathwayNodes.psql
index 82ca920c30..7b8740ca5b 100644
--- a/Model/lib/psql/webtables/MG/PathwayNodes.psql
+++ b/Model/lib/psql/webtables/MG/PathwayNodes.psql
@@ -1,6 +1,4 @@
-
-
- CREATE UNLOGGED TABLE NodesWithTypes AS
+ CREATE UNLOGGED TABLE :SCHEMA.NodesWithTypes AS
SELECT pn.pathway_id
, CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS display_label
, pa.url
@@ -17,7 +15,7 @@
, NULL AS default_structure
FROM sres.pathwaynode pn
INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
- LEFT JOIN PathwayAttributes pa ON pn.display_label = pa.source_id
+ LEFT JOIN :SCHEMA.PathwayAttributes pa ON pn.display_label = pa.source_id
WHERE ot.name = 'metabolic process'
UNION ALL
SELECT pn.pathway_id
@@ -37,7 +35,7 @@
FROM sres.pathwaynode pn
INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
LEFT JOIN sres.enzymeclass ec ON pn.row_id = ec.enzyme_class_id
- LEFT JOIN PathwayNodeGene tp ON pn.pathway_node_id = tp.pathway_node_id
+ LEFT JOIN :SCHEMA.PathwayNodeGene tp ON pn.pathway_node_id = tp.pathway_node_id
WHERE ot.name = 'enzyme'
GROUP BY pn.pathway_id
, pn.display_label
@@ -83,26 +81,20 @@
AND s.default_structure = 'Y'
) st ON c.chebi_accession = st.compound
WHERE ot.name = 'molecular entity'
-
;
-
-
- CREATE UNLOGGED TABLE ReactionsWithReversibility AS
+ CREATE UNLOGGED TABLE :SCHEMA.ReactionsWithReversibility AS
SELECT DISTINCT spr.pathway_relationship_id
, tpr.is_reversible
, tpr.reaction_source_id
FROM sres.pathwayrelationship spr
, apidb.pathwayreactionrel prr
- , PathwayReactions tpr
+ , :SCHEMA.PathwayReactions tpr
WHERE prr.pathway_relationship_id = spr.pathway_relationship_id
AND tpr.reaction_id = prr.pathway_reaction_id
-
- ;
+ ;
-
-
- CREATE UNLOGGED TABLE EnzymeEdges AS
+ CREATE UNLOGGED TABLE :SCHEMA.EnzymeEdges AS
SELECT DISTINCT nwt.pathway_id AS pathway_id
, nwt.pathway_node_id AS e_id
, nwt.type
@@ -113,9 +105,9 @@
, i.associated_node_id || '_' || o.node_id || '_' || rri.is_reversible || '_' || rro.is_reversible as io
FROM sres.pathwayrelationship i
, sres.pathwayrelationship o
- , NodesWithTypes nwt
- , ReactionsWithReversibility rri
- , ReactionsWithReversibility rro
+ , :SCHEMA.NodesWithTypes nwt
+ , :SCHEMA.ReactionsWithReversibility rri
+ , :SCHEMA.ReactionsWithReversibility rro
WHERE i.node_id = nwt.pathway_node_id
AND o.associated_node_id = nwt.pathway_node_id
AND i.pathway_relationship_id = rri.pathway_relationship_id
@@ -123,12 +115,9 @@
AND nwt.type = 'enzyme'
AND rri.is_reversible = rro.is_reversible
AND rri.reaction_source_id = rro.reaction_source_id
-
- ;
-
+ ;
-
- CREATE UNLOGGED TABLE ParentNodes AS
+ CREATE UNLOGGED TABLE :SCHEMA.ParentNodes AS
WITH AllEnzymeEdges AS (
SELECT string_agg(io, ',' ORDER BY io) AS all_edges
, e_id
@@ -141,7 +130,7 @@
SELECT pathway_id
, all_edges
, string_agg(e_id::varchar, '_' ORDER BY e_id) AS parent
- FROM AllEnzymeEdges
+ FROM :SCHEMA.AllEnzymeEdges
GROUP BY pathway_id
, all_edges
HAVING COUNT (*) > 1
@@ -150,26 +139,20 @@
FROM pn
, AllEnzymeEdges aee
WHERE aee.all_edges = pn.all_edges
-
;
-
-
- CREATE UNLOGGED TABLE NodesWithParents AS
+ CREATE UNLOGGED TABLE :SCHEMA.NodesWithParents AS
SELECT DISTINCT ee.e_id AS pathway_node_id
, pn.parent
, ee.type AS node_type
, ee.pathway_id
- FROM EnzymeEdges ee
- , ParentNodes pn
+ FROM :SCHEMA.EnzymeEdges ee
+ , :SCHEMA.ParentNodes pn
WHERE pn.pathway_id = ee.pathway_id
AND ee.e_id = pn.e_id
-
;
-
-
- CREATE UNLOGGED TABLE EnzymeReactions AS
+ CREATE UNLOGGED TABLE :SCHEMA.EnzymeReactions AS
SELECT DISTINCT pn.PATHWAY_NODE_ID node_id
, pr.SOURCE_ID AS reaction_source_id
FROM sres.pathwaynode pn
@@ -181,28 +164,22 @@
AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
AND ot.name = 'enzyme'
- AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID
-
+ AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID
;
-
-
- CREATE UNLOGGED TABLE ParentsForEdges AS
+ CREATE UNLOGGED TABLE :SCHEMA.ParentsForEdges AS
SELECT ee.e_id
, ee.m1_id
, ee.ir1
, ee.m2_id
, ee.ir2
, np.parent
- FROM EnzymeEdges ee
- , NodesWithParents np
+ FROM :SCHEMA.EnzymeEdges ee
+ , :SCHEMA.NodesWithParents np
WHERE ee.e_id = np.pathway_node_id
-
;
-
-
- CREATE TABLE PathwayEdges AS
+ CREATE TABLE :SCHEMA.PathwayEdges AS
SELECT pa.source_id
, pa.pathway_source
, rel.*
@@ -211,16 +188,16 @@
, coalesce(pe.parent, ee.e_id::varchar) AS source
, ee.m1_id::varchar AS target
, max(ee.ir1) AS is_reversible
- FROM EnzymeEdges ee
- LEFT JOIN ParentsForEdges pe ON ee.e_id = pe.e_id
+ FROM :SCHEMA.EnzymeEdges ee
+ LEFT JOIN :SCHEMA.ParentsForEdges pe ON ee.e_id = pe.e_id
GROUP BY ee.pathway_id, ee.m1_id, coalesce(pe.parent, ee.e_id::varchar)
UNION
SELECT DISTINCT ee.pathway_id
, ee.m2_id::varchar AS source
, coalesce(pe.parent, ee.e_id::varchar) AS target
, max(ee.ir2) AS is_reversible
- FROM EnzymeEdges ee
- LEFT JOIN ParentsForEdges pe ON ee.e_id = pe.e_id
+ FROM :SCHEMA.EnzymeEdges ee
+ LEFT JOIN :SCHEMA.ParentsForEdges pe ON ee.e_id = pe.e_id
GROUP BY ee.pathway_id, ee.m2_id, coalesce(pe.parent, ee.e_id::varchar)
UNION
SELECT pn1.pathway_id
@@ -241,12 +218,9 @@
) rel
, PathwayAttributes pa
WHERE pa.pathway_id = rel.pathway_id
-
;
-
-
-
- CREATE TABLE PathwayNodes AS
+
+ CREATE TABLE :SCHEMA.PathwayNodes AS
SELECT pa.source_id
, pa.pathway_source
, pn.display_label
@@ -280,27 +254,40 @@
, type AS node_type
, ee.pathway_id
, er.reaction_source_id
- FROM EnzymeEdges ee
- INNER JOIN EnzymeReactions er ON er.node_id = ee.e_id
- LEFT JOIN NodesWithParents nwp ON ee.e_id = nwp.pathway_node_id
+ FROM :SCHEMA.EnzymeEdges ee
+ INNER JOIN :SCHEMA.EnzymeReactions er ON er.node_id = ee.e_id
+ LEFT JOIN :SCHEMA.NodesWithParents nwp ON ee.e_id = nwp.pathway_node_id
UNION
SELECT nwp.parent
, NULL
, 'nodeOfNodes'
, pathway_id
, NULL
- FROM NodesWithParents nwp
+ FROM :SCHEMA.NodesWithParents nwp
UNION
SELECT nwt.pathway_node_id::varchar AS pathway_node_id
, NULL AS parent
, nwt.type
, nwt.pathway_id
, NULL
- FROM NodesWithTypes nwt
+ FROM :SCHEMA.NodesWithTypes nwt
WHERE nwt.type != 'enzyme'
) nodes_with_parents
- INNER JOIN PathwayAttributes pa ON nodes_with_parents.pathway_id = pa.pathway_id
- LEFT JOIN NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar
-
+ INNER JOIN :SCHEMA.PathwayAttributes pa ON nodes_with_parents.pathway_id = pa.pathway_id
+ LEFT JOIN :SCHEMA.NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar
;
+drop table :SCHEMA.NodesWithTypes;
+drop table :SCHEMA.ReactionsWithReversibility;
+drop table :SCHEMA.EnzymeEdges;
+drop table :SCHEMA.ParentNodes;
+drop table :SCHEMA.NodesWithParents;
+drop table :SCHEMA.EnzymeReactions;
+drop table :SCHEMA.ParentsForEdges;
+drop table :SCHEMA.ParentsForEdges;
+
+
+
+
+
+
diff --git a/Model/lib/psql/webtables/MG/PathwayReactions.psql b/Model/lib/psql/webtables/MG/PathwayReactions.psql
index 29f1ecef7c..5bdde8419a 100644
--- a/Model/lib/psql/webtables/MG/PathwayReactions.psql
+++ b/Model/lib/psql/webtables/MG/PathwayReactions.psql
@@ -1,6 +1,4 @@
-
-
- CREATE TABLE PathwayReactions AS
+ CREATE TABLE :SCHEMA.PathwayReactions AS
SELECT o.*
, CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html
FROM (
@@ -57,8 +55,8 @@
, SRES.PATHWAYNODE pn
, SRES.PATHWAYRELATIONSHIP prel
, SRES.ONTOLOGYTERM ot
- , PathwayCompounds pc
- LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
+ , :SCHEMA.PathwayCompounds pc
+ LEFT JOIN :SCHEMA.CompoundAttributes ca ON pc.chebi_accession = ca.source_id
WHERE p.PATHWAY_ID = prr.PATHWAY_ID
AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
@@ -101,7 +99,7 @@
, SRES.EXTERNALDATABASERELEASE edr
, SRES.ONTOLOGYTERM ot
, rep
- , PathwayCompounds pc
+ , :SCHEMA.PathwayCompounds pc
LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
WHERE p.PATHWAY_ID = prr.PATHWAY_ID
AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
@@ -128,14 +126,9 @@
) i
LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number
) o
-
- ;
-
-
-
- create index PathRcts_id_ix
- on PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name)
+ ;
-
- ;
+ create index :SCHEMA.PathRcts_id_ix
+ on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name)
+ ;
From 50e942f579a4aa3fa4976b179cd6dcda58474e50 Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Fri, 9 May 2025 17:05:49 -0400
Subject: [PATCH 003/112] rm spclosure and move genmicseq attributes
---
.../webtables/??/SequencePieceClosure.psql | 48 -------------------
.../{?? => MO}/GenomicSeqAttributes.psql | 0
2 files changed, 48 deletions(-)
delete mode 100644 Model/lib/psql/webtables/??/SequencePieceClosure.psql
rename Model/lib/psql/webtables/{?? => MO}/GenomicSeqAttributes.psql (100%)
diff --git a/Model/lib/psql/webtables/??/SequencePieceClosure.psql b/Model/lib/psql/webtables/??/SequencePieceClosure.psql
deleted file mode 100644
index 3993db26d3..0000000000
--- a/Model/lib/psql/webtables/??/SequencePieceClosure.psql
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
- CREATE TABLE :ORG_ABBREVSequencePieceClosure AS
- SELECT sp.sequence_piece_id,
- sp.virtual_na_sequence_id,
- sp.piece_na_sequence_id,
- sp.sequence_order,
- sp.distance_from_left,
- sp.uncertainty,
- sp.strand_orientation,
- sp.start_position,
- sp.end_position,
- sp.modification_date,
- 1 AS edge_level
- FROM dots.SequencePiece sp, dots.NaSequence ns
- WHERE sp.piece_na_sequence_id = ns.na_sequence_id
- AND (ns.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
-
- ;
-
-
-
- /* known issue: this should be run not just once, but iteratively
- until it doesn't create new records. Currently (7/2008),
- SequencePieces aren't nested even once. */
- INSERT INTO :ORG_ABBREVSequencePieceClosure
- (edge_level, virtual_na_sequence_id, piece_na_sequence_id,
- distance_from_left, strand_orientation, modification_date,
- start_position, end_position, sequence_order, sequence_piece_id)
- SELECT 2, higher.virtual_na_sequence_id, lower.piece_na_sequence_id,
- higher.distance_from_left,
- case
- when coalesce(higher.strand_orientation, '+') = coalesce(lower.strand_orientation, '+')
- then '+'
- else '-'
- end as strand_orientation,
- now(),
- higher.start_position - lower.distance_from_left,
- higher.end_position - lower.distance_from_left,
- higher.sequence_order,
- nextval('dots.sequencepiece_sq')
- FROM :ORG_ABBREVSequencePieceClosure higher, :ORG_ABBREVSequencePieceClosure lower
- WHERE higher.piece_na_sequence_id = lower.virtual_na_sequence_id
- AND higher.start_position >= lower.start_position + lower.distance_from_left
- AND higher.end_position <= lower.end_position + lower.distance_from_left
-
- ;
-
diff --git a/Model/lib/psql/webtables/??/GenomicSeqAttributes.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/GenomicSeqAttributes.psql
rename to Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
From 758358e8306958c7758adc4c0b0f49df47625c2d Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Fri, 9 May 2025 17:07:37 -0400
Subject: [PATCH 004/112] organism tables... and some protein tables
---
.../webtables/MO/GenomicSeqAttributes.psql | 49 ++------
.../webtables/MO/GenomicSeqAttributes_ix.psql | 23 ++++
.../psql/webtables/MO/GenomicSequenceId.psql | 5 +-
.../webtables/MO/GenomicSequenceId_ix.psql | 6 +-
.../webtables/MO/GenomicSequenceSequence.psql | 13 +-
.../MO/GenomicSequenceSequence_ix.psql | 2 +-
.../webtables/MO/OrganismAbbreviation.psql | 10 +-
.../MO/OrganismAbbreviationBlast.psql | 3 -
.../webtables/MO/OrganismSelectTaxonRank.psql | 5 +-
.../lib/psql/webtables/MO/PdbSimilarity.psql | 11 +-
.../webtables/MO/SequencePieceClosure.psql | 48 ++++++++
.../webtables/MO/SignalPeptideDomains.psql | 9 +-
.../webtables/MO/SignalPeptideDomains_ix.psql | 8 +-
Model/lib/psql/webtables/MO/TaxonSpecies.psql | 21 +---
Model/lib/psql/webtables/MO/Taxonomy.psql | 8 +-
Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 4 +-
.../webtables/MO/TransmembraneDomains.psql | 12 +-
.../webtables/MO/TransmembraneDomains_ix.psql | 4 +-
Model/lib/xml/tuningManager/tablePruning.txt | 4 +-
Model/lib/xml/tuningManager/webtables.org | 114 ++++++++++++++++++
20 files changed, 240 insertions(+), 119 deletions(-)
create mode 100644 Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
create mode 100644 Model/lib/psql/webtables/MO/SequencePieceClosure.psql
create mode 100644 Model/lib/xml/tuningManager/webtables.org
diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
index 6922acea92..6b964c4328 100644
--- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
@@ -1,8 +1,8 @@
-
-
- CREATE TABLE :ORG_ABBREVGenomicSeqAttributes AS
+:CREATE_AND_POPULATE
SELECT
- cast(apidb.prefixed_project_id(tn.name, ':ORG_ABBREV') as varchar(20)) as project_id,
+ :PROJECT_ID as project_id,
+ :ORG_ABBREV as org_abbrev,
+ current_timestamp as modification_date,
SUBSTR(sequence.source_id, 1, 60) AS source_id, sequence.a_count,
sequence.c_count, sequence.g_count, sequence.t_count,
(sequence.length
@@ -22,18 +22,15 @@
SUBSTR(sequence.chromosome, 1, 20) AS chromosome,
sequence.external_database_release_id, sequence.sequence_ontology_id,
sequence.chromosome_order_num, so.source_id as so_id, so.name as sequence_type,
- coalesce(virtualization.is_top_level, 1) as is_top_level,
+ 1 as is_top_level,
sequence.na_sequence_id, organism.genome_source,
organism.name_for_filenames, coalesce(msa.has_msa, 0) as has_msa
- FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon.taxon_id = organism.taxon_id,
+ FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon_id = :TAXON_ID and taxon.taxon_id = organism.taxon_id,
sres.OntologyTerm so,
( SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description,
a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id
FROM dots.ExternalNaSequence
- UNION
- SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description,
- a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id
- FROM dots.VirtualSequence
+ WHERE taxon_id = :TAXON_ID
) sequence
LEFT JOIN
(SELECT drns.na_sequence_id, max(dr.primary_identifier) AS genbank_accession
@@ -53,10 +50,6 @@
WHERE edr.external_database_id = ed.external_database_id
) db ON sequence.external_database_release_id = db.external_database_release_id
LEFT JOIN
- (SELECT distinct piece_na_sequence_id, 0 as is_top_level
- FROM :ORG_ABBREVSequencePieceClosure
- ) virtualization ON sequence.na_sequence_id = virtualization.piece_na_sequence_id
- LEFT JOIN
(SELECT a_na_sequence_id as na_sequence_id, 1 as has_msa
FROM apidb.Synteny syn
GROUP BY a_na_sequence_id
@@ -73,30 +66,6 @@
AND (sequence.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle')
ORDER BY organism, source_id
-
- ;
-
-
-
- create unique index pk_SeqAttr_ ON :ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id)
-
- ;
-
-
-
- create unique index SeqAttr_source_id ON :ORG_ABBREVGenomicSeqAttributes (source_id)
-
- ;
-
-
-
- create unique index SeqAttr_naseqid ON :ORG_ABBREVGenomicSeqAttributes (na_sequence_id)
-
- ;
-
-
-
- create unique index SeqAttr_taxsrc_id ON :ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id)
-
- ;
+
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
new file mode 100644
index 0000000000..7465dc4dbb
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
@@ -0,0 +1,23 @@
+
+
+ create unique index :SCHEMA.:ORG_ABBREVpk_SeqAttr_ ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id)
+
+ ;
+
+
+
+ create unique index :SCHEMA.:ORG_ABBREVSeqAttr_source_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (source_id)
+
+ ;
+
+
+
+ create unique index :SCHEMA.:ORG_ABBREVSeqAttr_naseqid ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (na_sequence_id)
+
+ ;
+
+
+
+ create unique index :SCHEMA.:ORG_ABBREVSeqAttr_taxsrc_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id)
+
+ ;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
index 3b5c7cbe91..7cf8dadc40 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
@@ -1,12 +1,10 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE GenomicSequenceId AS
SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence
FROM (
SELECT ns.source_id as id, ns.source_id as sequence
FROM dots.NaSequence ns, sres.OntologyTerm oterm
WHERE ns.sequence_ontology_id = oterm.ontology_term_id
+ AND ns.taxon_id = :TAXON_ID
AND oterm.name in ('random_sequence', 'contig', 'supercontig', 'chromosome','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle','kinetoplast')
UNION
SELECT dr.primary_identifier AS id, ns.source_id AS sequence
@@ -14,6 +12,7 @@
sres.DbRef dr, sres.ExternalDatabaseRelease edr,
sres.ExternalDatabase ed
WHERE dr.primary_identifier IS NOT NULL
+ AND ns.taxon_id = :TAXON_ID
AND ns.na_sequence_id = drnf.na_sequence_id
AND drnf.db_ref_id = dr.db_ref_id
AND dr.external_database_release_id
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
index 90bc21c3d6..a1fa450bcc 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
@@ -1,20 +1,20 @@
- CREATE INDEX GenSeqId_sequence_idx ON GenomicSequenceId (sequence, id)
+ CREATE INDEX :SCHEMA.GenSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id)
;
- CREATE INDEX GenSeqId_id_idx ON GenomicSequenceId (id, sequence)
+ CREATE INDEX :SCHEMA.GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence)
;
- CREATE INDEX GenSeqId_lowid_idx ON GenomicSequenceId (lower(id), sequence)
+ CREATE INDEX :SCHEMA.GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence)
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
index be8a73a415..86d8919c6b 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
@@ -1,13 +1,12 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE GenomicSequenceSequence AS
- SELECT sa.source_id, cast(apidb.project_id(tn.name) as varchar(20)) as project_id,
+ SELECT :PROJECT_ID as project_id,
+ :ORG_ABBREV as org_abbrev,
+ current_timestamp as modification_date,
+ sa.source_id,
ns.sequence
- FROM GenomicSeqAttributes sa, dots.NaSequence ns, sres.TaxonName tn
+ FROM :SCHEMA.GenomicSeqAttributes sa, dots.NaSequence ns
WHERE sa.na_sequence_id = ns.na_sequence_id
- AND ns.taxon_id = tn.taxon_id
- AND tn.name_class = 'scientific name'
+ and sa.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
index d68fd68292..7d28f93796 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
@@ -1,6 +1,6 @@
- create index GenomicSeq_ix on GenomicSequenceSequence (source_id, project_id)
+ create index :SCHEMA.:ORG_ABBREVGenomicSeq_ix on :SCHEMA.:ORG_ABBREVGenomicSequenceSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
index 56714b16e6..97944361b3 100644
--- a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
@@ -1,15 +1,9 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE OrganismAbbreviation AS
- SELECT organism, abbreviation
- FROM (
- select tn.name as organism, o.name_for_filenames as abbreviation
+ select tn.name as organism, o.name_for_filenames
from apidb.Organism o, sres.TaxonName tn
where o.taxon_id = tn.taxon_id
and tn.name_class = 'scientific name'
- ) subquery1
-
+ and o.taxon_id = :TAXON_ID
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
index f04888b75c..f098098f71 100644
--- a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
@@ -1,7 +1,4 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE OrganismAbbreviationBlast as
SELECT organism, parent, abbreviation, substr(project_id, 1, 20) as project_id
FROM OrganismAbbreviationWS
UNION
diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
index 326c177a8a..75792a7940 100644
--- a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
+++ b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
@@ -1,7 +1,4 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE OrganismSelectTaxonRank AS
WITH organism_rank AS (
SELECT tn1.name as organism, o.public_abbrev, tn2.name as parent_organism,
case when tn2.name = 'Oomycetes' then 'class' else r.rank end as rank
@@ -9,7 +6,7 @@
WITH RECURSIVE cte AS(
SELECT taxon_id input, taxon_id, rank, parent_id
FROM sres.taxon
- WHERE taxon_id IN (SELECT taxon_id FROM apidb.organism WHERE is_annotated_genome = 1)
+ WHERE taxon_id IN (SELECT taxon_id FROM apidb.organism WHERE taxon_id = :TAXON_ID and is_annotated_genome = 1)
UNION
SELECT cte.input, t.taxon_id, t.rank, t.parent_id
FROM sres.taxon t, cte
diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity.psql b/Model/lib/psql/webtables/MO/PdbSimilarity.psql
index 2818d01271..7a2644ce2b 100644
--- a/Model/lib/psql/webtables/MO/PdbSimilarity.psql
+++ b/Model/lib/psql/webtables/MO/PdbSimilarity.psql
@@ -1,8 +1,8 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE PdbSimilarity AS
- SELECT ta.source_id, eas.source_id AS pdb_chain,
+ SELECT :PROJECT_ID as project_id,
+ :ORG_ABBREV as org_abbrev,
+ current_timestamp as modification_date,
+ ta.source_id, eas.source_id AS pdb_chain,
substr(eas.description, 1, 100) AS pdb_title,
substr(eas.source_id
, 1
@@ -20,11 +20,12 @@
FROM apidb.PdbSimilarity s,
apiDB.ProteinDataBank eas,
sres.TaxonName tn,
- TranscriptAttributes ta
+ :SCHEMA.TranscriptAttributes ta
WHERE ta.aa_sequence_id = s.aa_sequence_id
AND s.pident = eas.protein_data_bank_id
and tn.name_class = 'scientific name'
AND eas.taxon_id = tn.taxon_id
+ and ta.org_abbrev = ':ORG_ABBREV'
ORDER BY ta.source_id, eas.source_id
diff --git a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql
new file mode 100644
index 0000000000..3993db26d3
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql
@@ -0,0 +1,48 @@
+
+
+ CREATE TABLE :ORG_ABBREVSequencePieceClosure AS
+ SELECT sp.sequence_piece_id,
+ sp.virtual_na_sequence_id,
+ sp.piece_na_sequence_id,
+ sp.sequence_order,
+ sp.distance_from_left,
+ sp.uncertainty,
+ sp.strand_orientation,
+ sp.start_position,
+ sp.end_position,
+ sp.modification_date,
+ 1 AS edge_level
+ FROM dots.SequencePiece sp, dots.NaSequence ns
+ WHERE sp.piece_na_sequence_id = ns.na_sequence_id
+ AND (ns.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
+
+ ;
+
+
+
+ /* known issue: this should be run not just once, but iteratively
+ until it doesn't create new records. Currently (7/2008),
+ SequencePieces aren't nested even once. */
+ INSERT INTO :ORG_ABBREVSequencePieceClosure
+ (edge_level, virtual_na_sequence_id, piece_na_sequence_id,
+ distance_from_left, strand_orientation, modification_date,
+ start_position, end_position, sequence_order, sequence_piece_id)
+ SELECT 2, higher.virtual_na_sequence_id, lower.piece_na_sequence_id,
+ higher.distance_from_left,
+ case
+ when coalesce(higher.strand_orientation, '+') = coalesce(lower.strand_orientation, '+')
+ then '+'
+ else '-'
+ end as strand_orientation,
+ now(),
+ higher.start_position - lower.distance_from_left,
+ higher.end_position - lower.distance_from_left,
+ higher.sequence_order,
+ nextval('dots.sequencepiece_sq')
+ FROM :ORG_ABBREVSequencePieceClosure higher, :ORG_ABBREVSequencePieceClosure lower
+ WHERE higher.piece_na_sequence_id = lower.virtual_na_sequence_id
+ AND higher.start_position >= lower.start_position + lower.distance_from_left
+ AND higher.end_position <= lower.end_position + lower.distance_from_left
+
+ ;
+
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
index 80e45141ca..2ce374b972 100644
--- a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
+++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
@@ -1,9 +1,9 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE :ORG_ABBREVSignalPeptideDomains AS
SELECT
- gf.source_id gene_source_id
+ :PROJECT_ID as project_id
+ , :ORG_ABBREV as org_abbrev
+ , current_timestamp as modification_date
+ , gf.source_id gene_source_id
, t.source_id transcript_source_id
, taf.na_feature_id
, spf.aa_feature_id
@@ -28,6 +28,7 @@
AND taf.aa_sequence_id = tas.aa_sequence_id
AND tas.aa_sequence_id = spf.aa_sequence_id
AND gf.na_feature_id = t.parent_id
+ AND tas.taxon_id = :TAXON_ID
AND (spf.signal_probability >= .5
OR spf.signal_probability IS NULL
OR ((spf.means_score + spf.maxy_score) / 2) >= .5
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
index b305c86713..d5b2c93f6c 100644
--- a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
+++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
@@ -1,15 +1,15 @@
- CREATE INDEX SignalP1_ix
- ON :ORG_ABBREVSignalPeptideDomains (aa_sequence_id)
+ CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP1_ix
+ ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (aa_sequence_id)
;
- CREATE INDEX SignalP2_ix
- ON :ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max)
+ CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP2_ix
+ ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max)
;
diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webtables/MO/TaxonSpecies.psql
index 7ef89cd520..79a6023d27 100644
--- a/Model/lib/psql/webtables/MO/TaxonSpecies.psql
+++ b/Model/lib/psql/webtables/MO/TaxonSpecies.psql
@@ -1,25 +1,10 @@
-
-
- CREATE UNLOGGED TABLE :ORG_ABBREVTaxonOfInterest AS
- SELECT taxon_id
- FROM :ORG_ABBREVGenomicSeqAttributes
- UNION
- SELECT ns.taxon_id
- FROM dots.NaSequence ns, dots.Est
- WHERE est.na_sequence_id = ns.na_sequence_id
-
- ;
-
+-- recursively walk taxon tree to find ancestor with rank "species"
+-- Update this to select max/min level with rank species if there are multiple
:CREATE_AND_POPULATE
-
-
- CREATE TABLE :ORG_ABBREVTaxonSpecies as
- -- recursively walk taxon tree to find ancestor with rank "species"
- -- Update this to select max/min level with rank species if there are multiple
WITH RECURSIVE cte AS (
SELECT TAXON_ID, taxon_id as parent_id, 1 as lvl
FROM sres.taxon
- WHERE taxon_id IN (SELECT taxon_id FROM :ORG_ABBREVTaxonofinterest)
+ WHERE taxon_id = :TAXON_ID
UNION ALL
SELECT cte.taxon_id, sub.parent_id, lvl + 1
FROM cte, sres.taxon sub
diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webtables/MO/Taxonomy.psql
index 249eda7162..cbc2cbc0fa 100644
--- a/Model/lib/psql/webtables/MO/Taxonomy.psql
+++ b/Model/lib/psql/webtables/MO/Taxonomy.psql
@@ -1,7 +1,4 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE Taxonomy as
WITH RECURSIVE cte AS (
WITH tax AS(
SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id,
@@ -13,7 +10,7 @@
)
SELECT tax.*, name as organism, ARRAY[taxon_id::numeric] as path
FROM tax
- WHERE name IN (SELECT DISTINCT organism FROM GeneAttributes)
+ WHERE taxon_id = :TAXON_ID
UNION
SELECT tax.*, cte.organism, cte.path || tax.taxon_id as path
FROM tax, cte
@@ -22,7 +19,4 @@
)
SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum
FROM (SELECT cte.* FROM cte ORDER BY path) t
-
-
:DECLARE_PARTITION;
-
diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
index 84083eda42..b5e2d3fb41 100644
--- a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
+++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
@@ -1,7 +1,7 @@
- create index tax_ix
- on Taxonomy
+ create index :SCHEMA.tax_ix
+ on :SCHEMA.Taxonomy
(organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank)
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql
index 7f76786eb0..99406037ee 100644
--- a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql
+++ b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql
@@ -1,8 +1,8 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE TransmembraneDomains AS
- SELECT ta.source_id as transcript_source_id
+ SELECT :PROJECT_ID as project_id
+ , :ORG_ABBREV as org_abbrev
+ , current_timestamp as modification_date
+ , ta.source_id as transcript_source_id
, ta.gene_source_id AS gene_source_id
, ta.project_id
, tmf.topology AS tmf_topology
@@ -13,7 +13,7 @@
, tmf.aa_sequence_id tmf_aa_sequence_id
, tas.source_id as protein_source_id
FROM dots.aalocation aal
- , transcriptattributes ta
+ , :SCHEMA.transcriptattributes ta
, dots.translatedaafeature taf
, dots.translatedaasequence tas
, dots.transmembraneaafeature tmf
@@ -21,7 +21,7 @@
AND taf.aa_sequence_id = tas.aa_sequence_id
AND tas.aa_sequence_id = tmf.aa_sequence_id
and tmf.aa_feature_id = aal.aa_feature_id
-
+ and ta.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
index bdefeef42d..f6aea03de7 100644
--- a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
+++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
@@ -1,7 +1,7 @@
- create index TransDom1_ix
- on TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology)
+ create index :SCHEMA.:ORG_ABBREVTransDom1_ix
+ on :SCHEMA.:ORG_ABBREVTransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology)
;
diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt
index c966901176..6f0b461f33 100644
--- a/Model/lib/xml/tuningManager/tablePruning.txt
+++ b/Model/lib/xml/tuningManager/tablePruning.txt
@@ -8,7 +8,7 @@ MG
??
??
??
-??
+R
??
??
??
@@ -82,7 +82,7 @@ K
R
R
K
-MO
+K
K
MG (tuning from non gus tables)
MG (tuning from non gus tables)
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
new file mode 100644
index 0000000000..1265054f2e
--- /dev/null
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -0,0 +1,114 @@
+#+title: Webtables
+
+
+* MO Tables
+- Organism
+ - [X] OrganismAbbreviationBlast_ix.psql
+ - [X] OrganismAbbreviationBlast.psql
+ - move to KEEP
+ - [X] OrganismAbbreviation_ix.psql
+ - [X] OrganismAbbreviation.psql
+ - updated the abbreviation field to name_for_filenames
+ - [X] OrganismSelectTaxonRank_ix.psql
+ - [X] OrganismSelectTaxonRank.psql
+ - [X] Taxonomy_ix.psql
+ - [X] Taxonomy.psql
+ - [X] TaxonSpecies_ix.psql
+ - [X] TaxonSpecies.psql
+- Genomic Sequence
+ - [X] GenomicSequenceId_ix.psql
+ - [X] GenomicSequenceId.psql
+ - [X] GenomicSequenceSequence_ix.psql
+ - [X] GenomicSequenceSequence.psql
+ - [X] SequencePieceClosure
+ - [X] GenomicSeqAttributes
+
+- Transcript / Protein
+ - [X] SignalPeptideDomains_ix.psql
+ - [X] SignalPeptideDomains.psql
+ - [X] TransmembraneDomains_ix.psql
+ - [X] TransmembraneDomains.psql
+ - [X] PdbSimilarity_ix.psql
+ - [X] PdbSimilarity.psql
+ - [ ] ProteinSequence_ix.psql
+ - [ ] ProteinSequence.psql
+ - [ ] ProteinAttributes_ix.psql
+ - [ ] ProteinAttributes.psql
+ - [ ] TranscriptAttributes_ix.psql
+ - [ ] TranscriptAttributes.psql
+ - [ ] CodingSequence_ix.psql
+ - [ ] CodingSequence.psql
+ - [ ] IntronUtrCoords_ix.psql
+ - [ ] IntronUtrCoords.psql
+ - [ ] TranscriptCenDistance_ix.psql
+ - [ ] TranscriptCenDistance.psql
+ - [ ] TranscriptPathway_ix.psql
+ - [ ] TranscriptPathway.psql
+ - [ ] TranscriptSequence_ix.psql
+ - [ ] TranscriptSequence.psql
+ - [ ] ChIPchipTranscript_ix.psql
+ - [ ] ChIPchipTranscript.psql
+
+- Gene
+ - [ ] GeneId_ix.psql
+ - [ ] GeneId.psql
+ - [ ] GeneAttributes_ix.psql
+ - [ ] GeneAttributes.psql
+ - [ ] GeneCopyNumbers_ix.psql
+ - [ ] GeneCopyNumbers.psql
+ - [ ] GeneGoTable_ix.psql
+ - [ ] GeneGoTable.psql
+ - [ ] GeneGoTerms_ix.psql
+ - [ ] GeneGoTerms.psql
+ - [ ] GeneLocations_ix.psql
+ - [ ] GeneLocations.psql
+ - [ ] GeneModelDump_ix.psql
+ - [ ] GeneModelDump.psql
+ - [ ] GeneSummaryFilter_ix.psql
+ - [ ] GeneSummaryFilter.psql
+ - [ ] TFBSGene_ix.psql
+ - [ ] TFBSGene.psql
+ - [ ] PathwayNodeGene_ix.psql
+ - [ ] PathwayNodeGene.psql
+ - [ ] PathwaysGeneTable_ix.psql
+ - [ ] PathwaysGeneTable.psql
+ - [ ] GoTermSummary_ix.psql
+ - [ ] GoTermSummary.psql
+ - [ ] EqtlSpan_ix.psql
+ - [ ] EqtlSpan.psql
+
+- EST
+ - [ ] EstAlignmentGeneSummary_ix.psql
+ - [ ] EstAlignmentGeneSummary.psql
+ - [ ] EstAttributes_ix.psql
+ - [ ] EstAttributes.psql
+ - [ ] EstSequence_ix.psql
+ - [ ] EstSequence.psql
+
+- Dataset / Other
+ - [ ] DatasetExampleSourceId_ix.psql
+ - [ ] DatasetExampleSourceId.psql
+ - [ ] Profile_ix.psql
+ - [ ] Profile.psql
+ - [ ] ProfileSamples_ix.psql
+ - [ ] ProfileSamples.psql
+ - [ ] ProfileType_ix.psql
+ - [ ] ProfileType.psql
+ - [ ] RnaSeqStats_ix.psql
+ - [ ] RnaSeqStats.psql
+ - [ ] OrganismAttributes_ix.psql
+ - [ ] OrganismAttributes.psql
+ - [ ] ChrCopyNumbers_ix.psql
+ - [ ] ChrCopyNumbers.psql
+
+- Junctions (Kathryn)
+ - [ ] IntronSupportLevel_ix.psql
+ - [ ] IntronSupportLevel.psql
+ - [ ] GeneIntJuncStats_ix.psql
+ - [ ] GeneIntJuncStats.psql
+ - [ ] GeneIntronJunction_ix.psql
+ - [ ] GeneIntronJunction.psql
+ - [ ] NameMappingGIJ_ix.psql
+ - [ ] NameMappingGIJ.psql
+ - [ ] GeneMaxIntronGIJ_ix.psql
+ - [ ] GeneMaxIntronGIJ.psql
From 9a3a48f32e041da4d7bd1f2fb40f902364f11da8 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 9 May 2025 21:18:48 -0400
Subject: [PATCH 005/112] break out indexes into _ix files
---
Model/lib/psql/webtables/MG/CompoundAttributes.psql | 2 --
Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql | 2 ++
Model/lib/psql/webtables/MG/CompoundId.psql | 2 --
Model/lib/psql/webtables/MG/CompoundId_ix.psql | 2 ++
Model/lib/psql/webtables/MG/GroupDomainAttribute.psql | 2 --
Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql | 2 ++
Model/lib/psql/webtables/MG/OntologyLevels.psql | 2 --
Model/lib/psql/webtables/MG/OntologyLevels_ix.psql | 2 ++
Model/lib/psql/webtables/MG/PathwayAttributes.psql | 7 -------
Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql | 7 +++++++
Model/lib/psql/webtables/MG/PathwayCompounds.psql | 3 ---
Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql | 3 +++
Model/lib/psql/webtables/MG/PathwayReactions.psql | 3 ---
Model/lib/psql/webtables/MG/PathwayReactions_ix.psql | 3 +++
14 files changed, 21 insertions(+), 21 deletions(-)
create mode 100644 Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql
create mode 100644 Model/lib/psql/webtables/MG/CompoundId_ix.psql
create mode 100644 Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql
create mode 100644 Model/lib/psql/webtables/MG/OntologyLevels_ix.psql
create mode 100644 Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql
create mode 100644 Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql
create mode 100644 Model/lib/psql/webtables/MG/PathwayReactions_ix.psql
diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes.psql b/Model/lib/psql/webtables/MG/CompoundAttributes.psql
index ea8207e077..22f54648e9 100644
--- a/Model/lib/psql/webtables/MG/CompoundAttributes.psql
+++ b/Model/lib/psql/webtables/MG/CompoundAttributes.psql
@@ -16,6 +16,4 @@
GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids
;
- CREATE INDEX :SCHEMA.CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id)
- ;
diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql b/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql
new file mode 100644
index 0000000000..a16c042e5b
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql
@@ -0,0 +1,2 @@
+ CREATE INDEX :SCHEMA.CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id)
+ ;
diff --git a/Model/lib/psql/webtables/MG/CompoundId.psql b/Model/lib/psql/webtables/MG/CompoundId.psql
index e235b32106..ca4a9c9c60 100644
--- a/Model/lib/psql/webtables/MG/CompoundId.psql
+++ b/Model/lib/psql/webtables/MG/CompoundId.psql
@@ -27,6 +27,4 @@
AND n.type = 'SYNONYM'
;
- CREATE INDEX :SCHEMA.CompoundId_idx ON :SCHEMA.CompoundId (id, compound)
- ;
diff --git a/Model/lib/psql/webtables/MG/CompoundId_ix.psql b/Model/lib/psql/webtables/MG/CompoundId_ix.psql
new file mode 100644
index 0000000000..217b020bac
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/CompoundId_ix.psql
@@ -0,0 +1,2 @@
+ CREATE INDEX :SCHEMA.CompoundId_idx ON :SCHEMA.CompoundId (id, compound)
+ ;
diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
index b023dc3669..e9f869535b 100644
--- a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
+++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
@@ -21,6 +21,4 @@ WHERE og.group_id = ag.group_name
)
;
-CREATE INDEX SCHEMA.GroupDomainAttribute_idx ON SCHEMA.GroupDomainAttribute (group_name)
- ;
diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql
new file mode 100644
index 0000000000..4112a31ce3
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql
@@ -0,0 +1,2 @@
+CREATE INDEX SCHEMA.GroupDomainAttribute_idx ON SCHEMA.GroupDomainAttribute (group_name)
+ ;
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql
index 417c6657aa..243ab0c1f1 100644
--- a/Model/lib/psql/webtables/MG/OntologyLevels.psql
+++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql
@@ -30,6 +30,4 @@
GROUP BY ontology_term_id
;
- create index olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth)
- ;
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql b/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql
new file mode 100644
index 0000000000..708dc47e5a
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql
@@ -0,0 +1,2 @@
+ create index :SCHEMA.olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth)
+ ;
diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webtables/MG/PathwayAttributes.psql
index 12720d8b73..e976027524 100644
--- a/Model/lib/psql/webtables/MG/PathwayAttributes.psql
+++ b/Model/lib/psql/webtables/MG/PathwayAttributes.psql
@@ -47,11 +47,4 @@
AND ed.name NOT LIKE '%MPMP%'
;
- CREATE UNIQUE INDEX :SCHEMA.PathAttr_sourceId_pwaySrc
- ON :SCHEMA.PathwayAttributes (source_id, pathway_source)
- ;
-
- create index :SCHEMA.PathAttr_ix
- on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count)
- ;
diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql b/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql
new file mode 100644
index 0000000000..99f50c5a08
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql
@@ -0,0 +1,7 @@
+ CREATE UNIQUE INDEX :SCHEMA.PathAttr_sourceId_pwaySrc
+ ON :SCHEMA.PathwayAttributes (source_id, pathway_source)
+ ;
+
+ create index :SCHEMA.PathAttr_ix
+ on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count)
+ ;
diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds.psql b/Model/lib/psql/webtables/MG/PathwayCompounds.psql
index a30b299c31..68fcc82904 100644
--- a/Model/lib/psql/webtables/MG/PathwayCompounds.psql
+++ b/Model/lib/psql/webtables/MG/PathwayCompounds.psql
@@ -78,7 +78,4 @@
) t2 LEFT OUTER JOIN CHEBI.COMPOUNDS c on t2.row_id = c.ID
;
- create index :SCHEMA.PthCmpd_id_ix
- on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name)
- ;
diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql b/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql
new file mode 100644
index 0000000000..fbcdfa72e1
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql
@@ -0,0 +1,3 @@
+ create index :SCHEMA.PthCmpd_id_ix
+ on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name)
+ ;
diff --git a/Model/lib/psql/webtables/MG/PathwayReactions.psql b/Model/lib/psql/webtables/MG/PathwayReactions.psql
index 5bdde8419a..5787e17267 100644
--- a/Model/lib/psql/webtables/MG/PathwayReactions.psql
+++ b/Model/lib/psql/webtables/MG/PathwayReactions.psql
@@ -128,7 +128,4 @@
) o
;
- create index :SCHEMA.PathRcts_id_ix
- on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name)
- ;
diff --git a/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql b/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql
new file mode 100644
index 0000000000..e0ed73978b
--- /dev/null
+++ b/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql
@@ -0,0 +1,3 @@
+ create index :SCHEMA.PathRcts_id_ix
+ on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name)
+ ;
From fcfb72559b1bd64285c25dbbbaa23d422a36f5b9 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Mon, 12 May 2025 12:16:09 -0400
Subject: [PATCH 006/112] add macros and fix index files
---
.../lib/psql/webtables/MG/OntologyLevels.psql | 3 +-
.../webtables/MO/ChIPchipTranscript_ix.psql | 6 +-
.../psql/webtables/MO/ChrCopyNumbers_ix.psql | 15 +--
.../psql/webtables/MO/CodingSequence_ix.psql | 6 +-
Model/lib/psql/webtables/MO/EqtlSpan_ix.psql | 8 +-
.../webtables/MO/EstAlignmentGeneSummary.psql | 2 +
.../lib/psql/webtables/MO/EstSequence_ix.psql | 4 -
.../psql/webtables/MO/GeneAttributes_ix.psql | 85 +++----------
.../psql/webtables/MO/GeneCopyNumbers_ix.psql | 8 +-
.../lib/psql/webtables/MO/GeneGoTable_ix.psql | 6 +-
.../lib/psql/webtables/MO/GeneGoTerms_ix.psql | 6 +-
Model/lib/psql/webtables/MO/GeneId_ix.psql | 30 +----
.../webtables/MO/GeneIntJuncStats_ix.psql | 5 +-
.../webtables/MO/GeneIntronJunction_ix.psql | 18 +--
.../psql/webtables/MO/GeneLocations_ix.psql | 8 +-
.../webtables/MO/GeneMaxIntronGIJ_ix.psql | 5 +-
.../psql/webtables/MO/GeneModelDump_ix.psql | 8 +-
.../webtables/MO/GenomicSeqAttributes_ix.psql | 12 --
.../psql/webtables/MO/GenomicSequenceId.psql | 5 +-
.../webtables/MO/GenomicSequenceId_ix.psql | 18 +--
.../MO/GenomicSequenceSequence_ix.psql | 4 -
.../psql/webtables/MO/GoTermSummary_ix.psql | 12 +-
.../psql/webtables/MO/IntronUtrCoords_ix.psql | 16 +--
.../psql/webtables/MO/NameMappingGIJ_ix.psql | 5 +-
.../webtables/MO/OrganismAbbreviation.psql | 5 +-
.../MO/OrganismAbbreviationBlast.psql | 11 +-
.../psql/webtables/MO/OrganismAttributes.psql | 35 +++--
.../webtables/MO/OrganismAttributes_ix.psql | 6 +-
.../webtables/MO/OrganismSelectTaxonRank.psql | 5 +-
.../webtables/MO/PathwaysGeneTable_ix.psql | 5 +-
.../psql/webtables/MO/ProfileSamples_ix.psql | 16 +--
Model/lib/psql/webtables/MO/Profile_ix.psql | 24 +---
.../psql/webtables/MO/ProteinAttributes.psql | 12 +-
.../webtables/MO/ProteinAttributes_ix.psql | 12 +-
.../psql/webtables/MO/ProteinSequence_ix.psql | 6 +-
.../webtables/MO/SequencePieceClosure.psql | 2 -
.../webtables/MO/SignalPeptideDomains_ix.psql | 8 --
Model/lib/psql/webtables/MO/TFBSGene_ix.psql | 12 +-
Model/lib/psql/webtables/MO/TaxonSpecies.psql | 5 +-
Model/lib/psql/webtables/MO/Taxonomy.psql | 3 +-
Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 8 +-
.../webtables/MO/TranscriptAttributes.psql | 1 +
.../webtables/MO/TranscriptAttributes_ix.psql | 120 ++++--------------
.../MO/TranscriptCenDistance_ix.psql | 8 +-
.../webtables/MO/TranscriptPathway_ix.psql | 26 ++--
.../webtables/MO/TranscriptSequence_ix.psql | 6 +-
.../webtables/MO/TransmembraneDomains_ix.psql | 4 -
47 files changed, 174 insertions(+), 461 deletions(-)
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql
index 243ab0c1f1..47df6e98e2 100644
--- a/Model/lib/psql/webtables/MG/OntologyLevels.psql
+++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql
@@ -30,4 +30,5 @@
GROUP BY ontology_term_id
;
-
+drop table :SCHEMA.Is_a_links;
+drop table :SCHEMA.Roots;
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
index 44e43c5b10..cfe11ee5c6 100644
--- a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
+++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
@@ -1,7 +1,3 @@
-
-
- create index chpgene_geneid_idx ON ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id)
-
-
+ create index :SCHEMA.:ORG_ABBREVchpgene_geneid_idx ON :SCHEMA.:ORG_ABBREVChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id)
;
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
index ab77efc977..d2d0448a29 100644
--- a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
+++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
@@ -1,16 +1,9 @@
-
-
- CREATE INDEX ChrCN_ix
- ON ChrCopyNumbers (input_pan_id, na_sequence_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVChrCN_ix
+ ON :SCHEMA.:ORG_ABBREVChrCopyNumbers (input_pan_id, na_sequence_id)
;
-
- CREATE INDEX ChrCN_output
- ON ChrCopyNumbers (output_pan_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVChrCN_output
+ ON :SCHEMA.:ORG_ABBREVChrCopyNumbers (output_pan_id)
;
diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
index 26d8f1c327..725030a983 100644
--- a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
@@ -1,7 +1,3 @@
-
-
- create index CodSeq_ix on CodingSequence (source_id, project_id)
-
-
+ create index :SCHEMA.:ORG_ABBREVCodSeq_ix on :SCHEMA.:ORG_ABBREVCodingSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
index ef659c7e2a..2a14698811 100644
--- a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
+++ b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
@@ -1,8 +1,4 @@
-
-
- create index eqtlSpan_ix
- on eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score)
-
-
+ create index :SCHEMA.:ORG_ABBREVeqtlSpan_ix
+ on :SCHEMA.:ORG_ABBREVeqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score)
;
diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
index 1985445f0a..90b4788bce 100644
--- a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
+++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
@@ -66,3 +66,5 @@
:DECLARE_PARTITION;
+drop table EstAlignmentGene;
+drop table EstAlignmentNoGene;
\ No newline at end of file
diff --git a/Model/lib/psql/webtables/MO/EstSequence_ix.psql b/Model/lib/psql/webtables/MO/EstSequence_ix.psql
index b7010a62aa..de699f5486 100644
--- a/Model/lib/psql/webtables/MO/EstSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/EstSequence_ix.psql
@@ -1,7 +1,3 @@
-
-
create index EstSeq_ix on EstSequence (source_id, project_id)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
index a63551450d..27f307d5dc 100644
--- a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
@@ -1,90 +1,47 @@
-
-
- CREATE UNIQUE INDEX GeneAttr_srcPrj
- ON :ORG_ABBREVGeneAttributes (source_id)
-
-
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_srcPrj
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (source_id)
;
-
-
- CREATE INDEX GeneAttr_exon_ix
- ON :ORG_ABBREVGeneAttributes (exon_count, source_id, project_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_exon_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (exon_count, source_id, project_id)
;
-
-
- CREATE INDEX GeneAttr_loc_ix
- ON :ORG_ABBREVGeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_loc_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated)
;
-
-
- CREATE INDEX GeneAttr_feat_ix
- ON :ORG_ABBREVGeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_feat_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed)
;
-
-
- CREATE INDEX GeneAttr_orthoname_ix ON :ORG_ABBREVGeneAttributes (
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_orthoname_ix ON :SCHEMA.:ORG_ABBREVGeneAttributes (
orthomcl_name, source_id, taxon_id, gene_type, na_feature_id,
na_sequence_id, start_min, end_max, organism, species,
product, project_id
)
-
;
-
-
- CREATE INDEX GeneAttr_ortholog_ix
- ON :ORG_ABBREVGeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_ortholog_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id)
;
-
-
- CREATE INDEX GeneAttr_orgsrc_ix
- ON :ORG_ABBREVGeneAttributes (organism, source_id, na_sequence_id, start_min, end_max)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_orgsrc_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (organism, source_id, na_sequence_id, start_min, end_max)
;
-
-
- CREATE INDEX GeneAttr_prjsrc_ix
- ON :ORG_ABBREVGeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0))
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_prjsrc_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0))
;
-
-
- CREATE INDEX GeneAttr_txid_ix
- ON :ORG_ABBREVGeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_txid_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id)
;
-
-
- CREATE INDEX GeneAttr_ids_ix
- ON :ORG_ABBREVGeneAttributes (na_feature_id, source_id, project_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_ids_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_feature_id, source_id, project_id)
;
-
-
- CREATE INDEX GeneAttr_loc_intjunc_ix
- ON :ORG_ABBREVGeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_loc_intjunc_ix
+ ON :SCHEMA.:ORG_ABBREVGeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX)
;
diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
index 7acf69d823..0108c24e21 100644
--- a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
@@ -1,8 +1,4 @@
-
-
- CREATE INDEX GeneCN_ix
- ON GeneCopyNumbers (input_pan_id, na_sequence_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneCN_ix
+ ON :SCHEMA.:ORG_ABBREVGeneCopyNumbers (input_pan_id, na_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
index de449a0e54..50f4e8b3bb 100644
--- a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
@@ -1,9 +1,5 @@
-
-
- create index ggtab_ix ON GeneGoTable
+ create index :SCHEMA.:ORG_ABBREVggtab_ix ON :SCHEMA.:ORG_ABBREVGeneGoTable
(source_id, project_id, go_id, transcript_ids, is_not, go_term_name,
ontology, source, evidence_code, reference, evidence_code_parameter, sort_key)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
index 8bb63eb7e4..8d6745ff93 100644
--- a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
@@ -1,10 +1,6 @@
-
-
- create index ggt_ix ON :ORG_ABBREVGeneGoTerms
+ create index ggt_ix ON :SCHEMA.:ORG_ABBREVGeneGoTerms
(gene_source_id, transcript_source_id, ontology, go_id, go_term_id,
go_term_name, source, evidence_code, reference,
evidence_code_parameter, aa_sequence_id, is_not)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/GeneId_ix.psql b/Model/lib/psql/webtables/MO/GeneId_ix.psql
index ef84acad08..55e4ca91c9 100644
--- a/Model/lib/psql/webtables/MO/GeneId_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneId_ix.psql
@@ -1,35 +1,15 @@
-
-
- CREATE INDEX GeneId_gene_idx ON :ORG_ABBREVGeneId (gene, id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_gene_idx ON :SCHEMA.:ORG_ABBREVGeneId (gene, id)
;
-
-
- CREATE INDEX GeneId_id_idx ON :ORG_ABBREVGeneId (id, gene)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_id_idx ON :SCHEMA.:ORG_ABBREVGeneId (id, gene)
;
-
-
- CREATE INDEX GeneId_uniqid_idx ON :ORG_ABBREVGeneId (unique_mapping, id, gene)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_uniqid_idx ON :SCHEMA.:ORG_ABBREVGeneId (unique_mapping, id, gene)
;
-
-
- CREATE INDEX GeneId_lowid_idx ON :ORG_ABBREVGeneId (lower(id), gene)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_lowid_idx ON :SCHEMA.:ORG_ABBREVGeneId (lower(id), gene)
;
-
-
- CREATE INDEX GeneId_uniqlowid_idx ON :ORG_ABBREVGeneId (unique_mapping, lower(id), gene)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_uniqlowid_idx ON :SCHEMA.:ORG_ABBREVGeneId (unique_mapping, lower(id), gene)
;
diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
index d2d463f8a2..063e77a1c3 100644
--- a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
@@ -1,6 +1,3 @@
-
-
- create index GeneIntJuncStat_ix on GeneIntJuncStats (na_sequence_id)
-
+ create index :SCHEMA.:ORG_ABBREVGeneIntJuncStat_ix on :SCHEMA.:ORG_ABBREVGeneIntJuncStats (na_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
index 90f98ad9cf..6e650ac67c 100644
--- a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
@@ -1,19 +1,11 @@
-
-
- create index gijnew_loc_ix on GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
-
+ create index :SCHEMA.:ORG_ABBREVgijnew_loc_ix on :SCHEMA.:ORG_ABBREVGeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
;
-
-
-
- create index gijnew_gnscid_ix on GeneIntronJunction (intron_feature_id)
-
+
+ create index :SCHEMA.:ORG_ABBREVgijnew_gnscid_ix on :SCHEMA.:ORG_ABBREVGeneIntronJunction (intron_feature_id)
;
-
-
- create index gijnew_txnloc_ix
- on GeneIntronJunction
+ create index :SCHEMA.:ORG_ABBREVgijnew_txnloc_ix
+ on :SCHEMA.:ORG_ABBREVGeneIntronJunction
(taxon_id, na_sequence_id, segment_start, segment_end, is_reversed,
total_unique, total_isrpm, annotated_intron)
diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
index 80b09a0fa2..45fdf888d5 100644
--- a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
@@ -1,8 +1,4 @@
-
-
- create index gloc_ix
- on :ORG_ABBREVGeneLocations (source_id, locations)
-
-
+ create :SCHEMA.:ORG_ABBREVindex gloc_ix
+ on :SCHEMA.:ORG_ABBREVGeneLocations (source_id, locations)
;
diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
index 2a734e98a7..0491ee072b 100644
--- a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
@@ -1,6 +1,3 @@
-
-
- CREATE INDEX GnMxIntGIJ_ix on GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id)
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGnMxIntGIJ_ix on :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJ (gene_source_id,protocol_app_node_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
index 89fc3c5b8e..4fa21c53fa 100644
--- a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
@@ -1,9 +1,5 @@
-
-
- create index gmd_ix
- on GeneModelDump
+ create index :SCHEMA.:ORG_ABBREVgmd_ix
+ on :SCHEMA.:ORG_ABBREVGeneModelDump
(source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
index 7465dc4dbb..e9b08230da 100644
--- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
@@ -1,23 +1,11 @@
-
-
create unique index :SCHEMA.:ORG_ABBREVpk_SeqAttr_ ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id)
-
;
-
-
create unique index :SCHEMA.:ORG_ABBREVSeqAttr_source_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (source_id)
-
;
-
-
create unique index :SCHEMA.:ORG_ABBREVSeqAttr_naseqid ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (na_sequence_id)
-
;
-
-
create unique index :SCHEMA.:ORG_ABBREVSeqAttr_taxsrc_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id)
-
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
index 7cf8dadc40..ee30a1c85f 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
@@ -1,5 +1,6 @@
:CREATE_AND_POPULATE
- SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence
+ SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence,
+ :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date
FROM (
SELECT ns.source_id as id, ns.source_id as sequence
FROM dots.NaSequence ns, sres.OntologyTerm oterm
@@ -19,7 +20,5 @@
= edr.external_database_release_id
AND edr.external_database_id = ed.external_database_id
) subquery1
-
-
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
index a1fa450bcc..c8b82387ac 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
@@ -1,21 +1,9 @@
-
-
- CREATE INDEX :SCHEMA.GenSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_sequence_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (sequence, id)
;
-
-
- CREATE INDEX :SCHEMA.GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_id_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (id, sequence)
;
-
-
- CREATE INDEX :SCHEMA.GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_lowid_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (lower(id), sequence)
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
index 7d28f93796..8cf38e086b 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
@@ -1,7 +1,3 @@
-
-
create index :SCHEMA.:ORG_ABBREVGenomicSeq_ix on :SCHEMA.:ORG_ABBREVGenomicSequenceSequence (source_id, project_id)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
index 55b2bbf76d..aff06120f1 100644
--- a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
+++ b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
@@ -1,16 +1,8 @@
-
-
- create index GoTermSum_aaSeqId_idx ON :ORG_ABBREVGoTermSummary (aa_sequence_id, go_id, source)
-
-
+ create index :SCHEMA.:ORG_ABBREVGoTermSum_aaSeqId_idx ON :SCHEMA.:ORG_ABBREVGoTermSummary (aa_sequence_id, go_id, source)
;
-
-
- create index GoTermSum_plugin_ix ON :ORG_ABBREVGoTermSummary
+ create index :SCHEMA.:ORG_ABBREVGoTermSum_plugin_ix ON :SCHEMA.:ORG_ABBREVGoTermSummary
(ontology, gene_source_id, is_not, is_go_slim,
go_id, go_term_name, evidence_code, evidence_category)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
index 0d38e419d9..e844e8da5b 100644
--- a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
+++ b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
@@ -1,16 +1,8 @@
-
-
- CREATE INDEX iuc_srcid_ix
- ON IntronUtrCoords (source_id, na_feature_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREViuc_srcid_ix
+ ON :SCHEMA.:ORG_ABBREVIntronUtrCoords (source_id, na_feature_id)
;
-
-
- CREATE INDEX iuc_nfid_ix
- ON IntronUtrCoords (na_feature_id, source_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREViuc_nfid_ix
+ ON :SCHEMA.:ORG_ABBREVIntronUtrCoords (na_feature_id, source_id)
;
diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
index 025dd1f8b3..6a25756c1e 100644
--- a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
+++ b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
@@ -1,6 +1,3 @@
-
-
- create index namemappinggij_ix on NameMappingGIJ (junctions_pan_id,exp_pan_id)
-
+ create index :SCHEMA.:ORG_ABBREVnamemappinggij_ix on :SCHEMA.:ORG_ABBREVNameMappingGIJ (junctions_pan_id,exp_pan_id)
;
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
index 97944361b3..5490dbfaad 100644
--- a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
@@ -1,6 +1,7 @@
:CREATE_AND_POPULATE
- select tn.name as organism, o.name_for_filenames
- from apidb.Organism o, sres.TaxonName tn
+ select tn.name as organism, o.name_for_filenames,
+ :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date
+ from apidb.Organism o, sres.TaxonName tn
where o.taxon_id = tn.taxon_id
and tn.name_class = 'scientific name'
and o.taxon_id = :TAXON_ID
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
index f098098f71..cee5ffe550 100644
--- a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
@@ -1,17 +1,18 @@
:CREATE_AND_POPULATE
- SELECT organism, parent, abbreviation, substr(project_id, 1, 20) as project_id
+ select sub.*, :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date
+ from (
+ SELECT organism, parent, abbreviation
FROM OrganismAbbreviationWS
UNION
-- all familes for popsets
SELECT DISTINCT family_name_for_files || ' Popset/Genbank Isolates' as organism, '' as parent,
- family_name_for_files as abbreviation, substr(project_name, 1, 20) as project_id
+ family_name_for_files as abbreviation
FROM apidb.Organism
WHERE family_name_for_files is not null
AND abbrev || '_isolates_genbank_RSRC' IN (SELECT external_db_name as db_name FROM PopsetAttributes)
AND family_name_for_files NOT IN ('Culicosporidae', 'Dubosqiidae', 'Ordosporidae')
UNION
- SELECT special.organism, special.parent, special.abbreviation,
- substr(ot.project_id, 1, 20) as project_id
+ SELECT special.organism, special.parent, special.abbreviation
FROM OrganismTree ot,
( -- all species and speciesAbbreviations from apidb.Organism where we have ests
SELECT DISTINCT
@@ -33,7 +34,7 @@
'Cryptosporidium' as parent, 'CryptosporidiidaeReference' as abbreviation
) special
WHERE special.parent = ot.term
-
+ ) sub
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
index 5414446561..1a38fed293 100644
--- a/Model/lib/psql/webtables/MO/OrganismAttributes.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
@@ -1,6 +1,6 @@
- CREATE UNLOGGED TABLE DataSourceCount AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVDataSourceCount AS
SELECT
taxon_id,
max(CASE WHEN stype = 'organellar' THEN num ELSE null END) as organellar_has,
@@ -42,7 +42,7 @@
- CREATE UNLOGGED TABLE OrganismCentromere AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVOrganismCentromere AS
SELECT distinct s.taxon_id,
case when count(*) > 0 then 1 else 0 end as hasCentromere
FROM DOTS.MISCELLANEOUS f
@@ -57,7 +57,7 @@
- CREATE UNLOGGED TABLE SequenceCount AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVSequenceCount AS
SELECT
taxon_id,
max(CASE WHEN sequence_type = 'contig' THEN num ELSE null END) as contig_num,
@@ -75,7 +75,7 @@
- CREATE UNLOGGED TABLE CommunityCount AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVCommunityCount AS
-- SELECT taxon_id, count(*) as communityCount
-- TODO: addd this back
select taxon_id, 0 as communityCount
@@ -94,7 +94,7 @@
- CREATE UNLOGGED TABLE ProfileCount AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProfileCount AS
SELECT ga.taxon_id,
count(distinct(case when p.dataset_type = 'transcript_expression'
and p.dataset_subtype like '%rt_pcr%'
@@ -122,7 +122,7 @@
- CREATE UNLOGGED TABLE PopsetCount AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPopsetCount AS
SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id
FROM (
(SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id
@@ -154,7 +154,7 @@
- CREATE UNLOGGED TABLE GeneCount AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGeneCount AS
SELECT genomestat.taxon_id,
genomestat.project_id,
genomestat.database_version,
@@ -300,13 +300,13 @@
FROM apidb.Organism o
INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id
INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id
- LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id
- LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id
- LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id
- LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id
- LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id
- LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id
- LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVDataSourceCount dsc ON o.taxon_id = dsc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVOrganismCentromere oc ON o.taxon_id = oc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVSequenceCount sc ON o.taxon_id = sc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVCommunityCount cc ON o.taxon_id = cc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVGeneCount gc ON o.taxon_id = gc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVpopsetCount psc ON o.taxon_id = psc.taxon_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVprofileCount pc ON o.taxon_id = pc.taxon_id
LEFT JOIN (
SELECT taxon_id, round(avg(length),1) as avg_transcript_length
FROM TranscriptAttributes
@@ -325,3 +325,10 @@
:DECLARE_PARTITION;
+drop table :SCHEMA.:ORG_ABBREVDataSourceCount;
+drop table :SCHEMA.:ORG_ABBREVOrganismCentromere;
+drop table :SCHEMA.:ORG_ABBREVProfileCount;
+drop table :SCHEMA.:ORG_ABBREVPopsetCount;
+drop table :SCHEMA.:ORG_ABBREVGeneCount;
+drop table :SCHEMA.:ORG_ABBREVSequenceCount;
+drop table :SCHEMA.:ORG_ABBREVCommunityCount;
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
index b40126a71e..27ab99b34a 100644
--- a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
@@ -1,7 +1,3 @@
-
-
-create unique index Organism_sourceId_idx ON OrganismAttributes (source_id)
-
-
+create unique index :SCHEMA.:ORG_ABBREVOrganism_sourceId_idx ON :SCHEMA.:ORG_ABBREVOrganismAttributes (source_id)
;
diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
index 75792a7940..d37233b9b2 100644
--- a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
+++ b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
@@ -26,7 +26,10 @@
AND tn1.taxon_id = o.taxon_id
)
SELECT
- organisms.organism
+ :PROJECT_ID as project_id
+ , :ORG_ABBREV as org_abbrev
+ , current_timestamp as modification_date
+ , organisms.organism
, organisms.public_abbrev
, coalesce(phylum.parent_organism, 'N/A') as phylum
, coalesce(genus.parent_organism, 'N/A') as genus
diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
index 776a8f5ba5..fe2988a30e 100644
--- a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
+++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
@@ -1,9 +1,6 @@
-
-
- create index pgt_ix on PathwaysGeneTable
+ create index :SCHEMA.:ORG_ABBREVpgt_ix on :SCHEMA.:ORG_ABBREVPathwaysGeneTable
(gene_source_id, project_id, pathway_source_id, pathway_name,
reactions, enzyme, expasy_url, pathway_source, exact_match)
-
;
diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
index f9f068e65a..82054a5477 100644
--- a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
@@ -1,22 +1,14 @@
-
-
- create index psamp_ix
- on ProfileSamples
+ create :SCHEMA.:ORG_ABBREVindex psamp_ix
+ on :SCHEMA.:ORG_ABBREVProfileSamples
(dataset_name, profile_type, study_id, node_order_num,
protocol_app_node_id, profile_set_suffix, study_name,
node_type, protocol_app_node_name)
-
-
;
-
-
- create index psampstdy_ix
- on ProfileSamples
+ create index :SCHEMA.:ORG_ABBREVpsampstdy_ix
+ on :SCHEMA.:ORG_ABBREVProfileSamples
(study_name, node_type, profile_type, node_order_num,
protocol_app_node_id, profile_set_suffix, study_id,
protocol_app_node_name, dataset_name)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/MO/Profile_ix.psql
index 25c977a2e7..0839baa093 100644
--- a/Model/lib/psql/webtables/MO/Profile_ix.psql
+++ b/Model/lib/psql/webtables/MO/Profile_ix.psql
@@ -1,24 +1,12 @@
-
-
- create index exprof_idx
- on Profile (source_id, profile_type, profile_set_name)
-
-
+ create index :SCHEMA.:ORG_ABBREVexprof_idx
+ on :SCHEMA.:ORG_ABBREVProfile (source_id, profile_type, profile_set_name)
;
-
-
- create index profset_idx
- on Profile (profile_set_name, profile_type)
-
-
+ create index :SCHEMA.:ORG_ABBREVprofset_idx
+ on :SCHEMA.:ORG_ABBREVProfile (profile_set_name, profile_type)
;
-
-
- create index srcdset_idx
- on Profile (source_id, dataset_subtype, dataset_type)
-
-
+ create index :SCHEMA.:ORG_ABBREVsrcdset_idx
+ on :SCHEMA.:ORG_ABBREVProfile (source_id, dataset_subtype, dataset_type)
;
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
index de8bb59b53..b6c1dba255 100644
--- a/Model/lib/psql/webtables/MO/ProteinAttributes.psql
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
@@ -1,6 +1,6 @@
- CREATE TABLE :ORG_ABBREVGoTermList AS
+ CREATE UNLOGGED TABLE :ORG_ABBREVGoTermList AS
SELECT aa_sequence_id, ontology, source,
string_agg(go_term_name, ';' ORDER BY go_term_name) AS go_terms,
string_agg(go_id, ';' ORDER BY go_term_name) AS go_ids
@@ -15,7 +15,7 @@
- CREATE TABLE :ORG_ABBREVProteinGoAttributes AS
+ CREATE UNLOGGED TABLE :ORG_ABBREVProteinGoAttributes AS
SELECT DISTINCT gts.aa_sequence_id,
substr(annotated_go_component.go_terms, 1, 300) AS annotated_go_component,
substr(annotated_go_function.go_terms, 1, 300) AS annotated_go_function,
@@ -85,7 +85,7 @@
- CREATE TABLE :ORG_ABBREVtProteinAttrsEc AS
+ CREATE UNLOGGED TABLE :ORG_ABBREVtProteinAttrsEc AS
SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers
FROM (SELECT DISTINCT asec.aa_sequence_id,
ec.ec_number || ' (' || ec.description || ')' AS ec_number
@@ -99,7 +99,7 @@
- CREATE TABLE :ORG_ABBREVtProteinAttrsEcDerived AS
+ CREATE UNLOGGED TABLE :ORG_ABBREVtProteinAttrsEcDerived AS
SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived
FROM (SELECT DISTINCT asec.aa_sequence_id,
ec.ec_number || ' (' || ec.description || ')' AS ec_number
@@ -205,3 +205,7 @@
;
+drop table :ORG_ABBREVGoTermList;
+drop table :ORG_ABBREVProteinGoAttributes;
+drop table :ORG_ABBREVtProteinAttrsEc;
+drop table :ORG_ABBREVtProteinAttrsEcDerived;
\ No newline at end of file
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
index f9899e7f80..9279d85729 100644
--- a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
@@ -1,14 +1,6 @@
-
-
- CREATE INDEX PA_sourceId ON :ORG_ABBREVProteinAttributes (source_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVPA_sourceId ON :SCHEMA.:ORG_ABBREVProteinAttributes (source_id)
;
-
-
-
- CREATE INDEX PA_aaSequenceId ON :ORG_ABBREVProteinAttributes (aa_sequence_id)
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVPA_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinAttributes (aa_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
index 98f1c06b2d..7f37d152ef 100644
--- a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
@@ -1,7 +1,3 @@
-
-
- create index ProtSeq_ix on ProteinSequence (source_id, project_id)
-
-
+ create index :SCHEMA.:ORG_ABBREVProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql
index 3993db26d3..a7fc7c39b4 100644
--- a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql
+++ b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql
@@ -1,5 +1,3 @@
-
-
CREATE TABLE :ORG_ABBREVSequencePieceClosure AS
SELECT sp.sequence_piece_id,
sp.virtual_na_sequence_id,
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
index d5b2c93f6c..ddd24897a3 100644
--- a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
+++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
@@ -1,16 +1,8 @@
-
-
CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP1_ix
ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (aa_sequence_id)
-
-
;
-
-
CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP2_ix
ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
index c350a43aa8..7563dac0c5 100644
--- a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
+++ b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
@@ -1,14 +1,6 @@
-
-
- create index tfbs_geneid_idx ON TFBSGene (gene_source_id, tfbs_na_feature_id)
-
-
+ create index :SCHEMA.:ORG_ABBREVtfbs_geneid_idx ON :SCHEMA.:ORG_ABBREVTFBSGene (gene_source_id, tfbs_na_feature_id)
;
-
-
- create index geneid_tfbs_idx ON TFBSGene (tfbs_na_feature_id,gene_source_id)
-
-
+ create index :SCHEMA.:ORG_ABBREVgeneid_tfbs_idx ON :SCHEMA.:ORG_ABBREVTFBSGene (tfbs_na_feature_id,gene_source_id)
;
diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webtables/MO/TaxonSpecies.psql
index 79a6023d27..d0aa9578a2 100644
--- a/Model/lib/psql/webtables/MO/TaxonSpecies.psql
+++ b/Model/lib/psql/webtables/MO/TaxonSpecies.psql
@@ -10,7 +10,10 @@
FROM cte, sres.taxon sub
WHERE cte.parent_id = sub.taxon_id
)
- SELECT c.taxon_id, c.parent_id as species_taxon_id
+ SELECT c.taxon_id, c.parent_id as species_taxon_id,
+ :PROJECT_ID as project_id,
+ :ORG_ABBREV as org_abbrev,
+ current_timestamp as modification_date
FROM cte c, sres.taxon t
WHERE t.taxon_id = c.parent_id
AND t.rank='species'
diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webtables/MO/Taxonomy.psql
index cbc2cbc0fa..8fe3bf229c 100644
--- a/Model/lib/psql/webtables/MO/Taxonomy.psql
+++ b/Model/lib/psql/webtables/MO/Taxonomy.psql
@@ -17,6 +17,7 @@
WHERE cte.parent_id = tax.taxon_id
AND tax.name != 'root'
)
- SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum
+ SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum,
+ :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, current_timestamp as modification_date
FROM (SELECT cte.* FROM cte ORDER BY path) t
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
index b5e2d3fb41..b3e8518849 100644
--- a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
+++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
@@ -1,9 +1,5 @@
-
-
- create index :SCHEMA.tax_ix
- on :SCHEMA.Taxonomy
+ create index :SCHEMA.:ORG_ABBREVtax_ix
+ on :SCHEMA.:ORG_ABBREVTaxonomy
(organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
index 388d722537..5ac1d5fd67 100644
--- a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
@@ -438,3 +438,4 @@
;
+drop table :ORG_ABBREVTranscriptUniprot
\ No newline at end of file
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
index bcb9c3e57f..481959c6d7 100644
--- a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
@@ -1,135 +1,71 @@
-
-
- CREATE UNIQUE INDEX TranscriptAttr_sourceId
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_sourceId
ON :ORG_ABBREVTranscriptAttributes (source_id)
-
+ ;
- ;
-
-
-
- CREATE UNIQUE INDEX TranscriptAttr_srcPrj
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_srcPrj
ON :ORG_ABBREVTranscriptAttributes (source_id, gene_source_id, project_id)
-
-
;
-
-
- CREATE UNIQUE INDEX TranscriptAttr_genesrc
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_genesrc
ON :ORG_ABBREVTranscriptAttributes (gene_source_id, source_id, project_id)
-
-
;
-
-
- CREATE UNIQUE INDEX TranscriptAttr_exon_ix
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_exon_ix
ON :ORG_ABBREVTranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id)
-
-
;
-
-
- CREATE UNIQUE INDEX TranscriptAttr_loc_ix
- ON :ORG_ABBREVTranscriptAttributes
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_loc_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes
(na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id,
is_deprecated, source_id, gene_source_id, project_id)
-
-
;
-
-
- CREATE UNIQUE INDEX TranscriptAttr_feat_ix
- ON :ORG_ABBREVTranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id)
-
-
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_feat_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id)
;
-
-
- CREATE UNIQUE INDEX TranscriptAttr_geneid_ix
- ON :ORG_ABBREVTranscriptAttributes (gene_id, source_id, gene_source_id, project_id)
-
-
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_geneid_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_id, source_id, gene_source_id, project_id)
;
-
-
- CREATE UNIQUE INDEX TransAttr_orthoname_ix
- ON :ORG_ABBREVTranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
-
-
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTransAttr_orthoname_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
;
-
-
- CREATE UNIQUE INDEX TransAttr_molwt_ix
- ON :ORG_ABBREVTranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id)
-
-
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTransAttr_molwt_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id)
;
-
-
- CREATE INDEX TransAttr_ortholog_ix
- ON :ORG_ABBREVTranscriptAttributes
+ CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_ortholog_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes
(source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id)
-
-
;
-
-
- CREATE INDEX TransAttr_orgsrc_ix
- ON :ORG_ABBREVTranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_orgsrc_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max)
;
-
-
- CREATE INDEX TransAttr_lwrsrc_ix
- ON :ORG_ABBREVTranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_lwrsrc_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id)
;
-
-
- CREATE INDEX TransAttr_species_ix
- ON :ORG_ABBREVTranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id)
-
-
+ CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_species_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id)
;
-
-
CREATE UNIQUE INDEX TrnscrptAttr_geneinfo
- ON :ORG_ABBREVTranscriptAttributes
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes
(gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id,
protein_source_id, na_sequence_id, length, protein_length,
five_prime_utr_length, three_prime_utr_length)
-
-
;
-
-
- CREATE UNIQUE INDEX TranscriptAttr_genenaf
- ON :ORG_ABBREVTranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id)
-
-
+ CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_genenaf
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id)
;
-
-
- CREATE INDEX TransAttr_locsIds_ix
- ON :ORG_ABBREVTranscriptAttributes
+ CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_locsIds_ix
+ ON :SCHEMA.:ORG_ABBREVTranscriptAttributes
(na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id)
-
-
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
index e6630dae0d..8b77d66964 100644
--- a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
@@ -1,8 +1,4 @@
-
-
- create index GCent_loc_ix
- on TranscriptCenDistance (genomic_sequence, centromere_distance)
-
-
+ create index :SCHEMA.:ORG_ABBREVGCent_loc_ix
+ on :SCHEMA.:ORG_ABBREVTranscriptCenDistance (genomic_sequence, centromere_distance)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
index 7587695697..26773299a5 100644
--- a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
@@ -1,18 +1,12 @@
-
-
- create index TranscriptPath_ix
- on TranscriptPathway (gene_source_id, source_id, pathway_source_id,
- pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway,
- ec_number_pathway, pathway_source)
-
-
- ;
-
-
-
- create index TranscriptPathSource_ix
- on TranscriptPathway (pathway_source, gene_source_id, source_id)
-
-
+ create index :SCHEMA.:ORG_ABBREVTranscriptPath_ix
+ on :SCHEMA.:ORG_ABBREVTranscriptPathway
+ (gene_source_id, source_id, pathway_source_id,
+ pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway,
+ ec_number_pathway, pathway_source)
+ ;
+
+ create index :SCHEMA.:ORG_ABBREVTranscriptPathSource_ix
+ on :SCHEMA.:ORG_ABBREVTranscriptPathway (pathway_source,
+ gene_source_id, source_id)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
index 079e8faf28..73f233d48e 100644
--- a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
@@ -1,7 +1,3 @@
-
-
- create index XScriptSeq_ix on :ORG_ABBREVTranscriptSequence (source_id, project_id)
-
-
+ create index :SCHEMA.:ORG_ABBREVXScriptSeq_ix on :SCHEMA.:ORG_ABBREVTranscriptSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
index f6aea03de7..e6d76b992a 100644
--- a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
+++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
@@ -1,8 +1,4 @@
-
-
create index :SCHEMA.:ORG_ABBREVTransDom1_ix
on :SCHEMA.:ORG_ABBREVTransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology)
-
-
;
From 37285ee751d7b1413e0f8570671f8cdb5d71ce3a Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Mon, 12 May 2025 17:19:30 -0400
Subject: [PATCH 007/112] wip
---
.../psql/webtables/MO/ProteinAttributes.psql | 67 +++++----
.../psql/webtables/MO/ProteinSequence.psql | 14 +-
.../webtables/MO/TranscriptAttributes.psql | 134 ++++++++++--------
Model/lib/xml/tuningManager/webtables.org | 8 +-
4 files changed, 119 insertions(+), 104 deletions(-)
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
index b6c1dba255..ad6c7cd95b 100644
--- a/Model/lib/psql/webtables/MO/ProteinAttributes.psql
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
@@ -1,13 +1,15 @@
- CREATE UNLOGGED TABLE :ORG_ABBREVGoTermList AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGoTermList_tmp AS
SELECT aa_sequence_id, ontology, source,
string_agg(go_term_name, ';' ORDER BY go_term_name) AS go_terms,
string_agg(go_id, ';' ORDER BY go_term_name) AS go_ids
FROM (
SELECT aa_sequence_id, ontology,
CASE evidence_code WHEN 'IEA' THEN 'predicted' ELSE 'annotated' END AS source, go_term_name, go_id
- FROM :ORG_ABBREVGeneGoTerms
+ FROM :SCHEMA.GeneGoTerms
+ WHERE org_abbrev = ':ORG_ABBREV'
+
) t
GROUP BY aa_sequence_id, ontology, source
@@ -15,7 +17,7 @@
- CREATE UNLOGGED TABLE :ORG_ABBREVProteinGoAttributes AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp AS
SELECT DISTINCT gts.aa_sequence_id,
substr(annotated_go_component.go_terms, 1, 300) AS annotated_go_component,
substr(annotated_go_function.go_terms, 1, 300) AS annotated_go_function,
@@ -30,44 +32,44 @@
substr(predicted_go_function.go_ids, 1, 300) AS predicted_go_id_function,
substr(predicted_go_process.go_ids, 1, 300) AS predicted_go_id_process
FROM
- (SELECT DISTINCT aa_sequence_id FROM :ORG_ABBREVGoTermSummary) gts
+ (SELECT DISTINCT aa_sequence_id FROM :SCHEMA.GoTermSummary where org_abbrev = ':ORG_ABBREV') gts
LEFT JOIN (
- SELECT * FROM :ORG_ABBREVGoTermList
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
WHERE source = 'annotated' AND ontology = 'Cellular Component'
) annotated_go_component ON
gts.aa_sequence_id = annotated_go_component.aa_sequence_id
AND 'annotated' = annotated_go_component.source
AND 'Cellular Component' = annotated_go_component.ontology
LEFT JOIN (
- SELECT * FROM :ORG_ABBREVGoTermList
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
WHERE source = 'annotated' AND ontology = 'Molecular Function'
) annotated_go_function ON
gts.aa_sequence_id = annotated_go_function.aa_sequence_id
AND 'annotated' = annotated_go_function.source
AND 'Molecular Function' = annotated_go_function.ontology
LEFT JOIN (
- SELECT * FROM :ORG_ABBREVGoTermList
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
WHERE source = 'annotated' AND ontology = 'Biological Process'
) annotated_go_process ON
gts.aa_sequence_id = annotated_go_process.aa_sequence_id
AND 'annotated' = annotated_go_process.source
AND 'Biological Process' = annotated_go_process.ontology
LEFT JOIN (
- SELECT * FROM :ORG_ABBREVGoTermList
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
WHERE source = 'predicted' AND ontology = 'Cellular Component'
) predicted_go_component ON
gts.aa_sequence_id = predicted_go_component.aa_sequence_id
AND 'predicted' = predicted_go_component.source
AND 'Cellular Component' = predicted_go_component.ontology
LEFT JOIN (
- SELECT * FROM :ORG_ABBREVGoTermList
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
WHERE source = 'predicted' AND ontology = 'Molecular Function'
) predicted_go_function ON
gts.aa_sequence_id = predicted_go_function.aa_sequence_id
AND 'predicted' = predicted_go_function.source
AND 'Molecular Function' = predicted_go_function.ontology
LEFT JOIN (
- SELECT * FROM :ORG_ABBREVGoTermList
+ SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp
WHERE source = 'predicted' AND ontology = 'Biological Process'
) predicted_go_process ON
gts.aa_sequence_id = predicted_go_process.aa_sequence_id
@@ -78,19 +80,21 @@
- create index ProteinGoAttr_aaSequenceId ON :ORG_ABBREVProteinGoAttributes (aa_sequence_id)
+ create index :SCHEMA.:ORG_ABBREVProteinGoAttr_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id)
;
- CREATE UNLOGGED TABLE :ORG_ABBREVtProteinAttrsEc AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp AS
SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers
FROM (SELECT DISTINCT asec.aa_sequence_id,
ec.ec_number || ' (' || ec.description || ')' AS ec_number
- FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec
+ FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq
WHERE ec.enzyme_class_id = asec.enzyme_class_id
+ AND seq.aa_sequence_id = asec.aa_sequence_id
+ AND seq.taxon_id = :TAXON_ID
AND NOT asec.evidence_code = 'OrthoMCLDerived'
) t
GROUP BY aa_sequence_id
@@ -99,23 +103,26 @@
- CREATE UNLOGGED TABLE :ORG_ABBREVtProteinAttrsEcDerived AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS
SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived
FROM (SELECT DISTINCT asec.aa_sequence_id,
ec.ec_number || ' (' || ec.description || ')' AS ec_number
- FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec
+ FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq
WHERE ec.enzyme_class_id = asec.enzyme_class_id
+ AND seq.aa_sequence_id = asec.aa_sequence_id
+ AND seq.taxon_id = :TAXON_ID
AND asec.evidence_code = 'OrthoMCLDerived'
) t
GROUP BY aa_sequence_id
;
-:CREATE_AND_POPULATE
-
- CREATE TABLE :ORG_ABBREVProteinAttributes AS
- SELECT pi.name as project_id,
+-- TODO: Filter the subqueries or break into tmp tables for performance
+:CREATE_AND_POPULATE
+ SELECT ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
tas.source_id, tas.aa_sequence_id,
t.source_id as transcript_source_id,
gf.source_id as gene_source_id,
@@ -150,18 +157,18 @@
row_number() over (partition by t.source_id order by tas.length desc) as rank_in_transcript,
uniprot.uniprot_ids
FROM
- core.ProjectInfo pi
- INNER JOIN dots.Transcript t ON t.row_project_id = pi.project_id
+ dots.Transcript t,
INNER JOIN dots.GeneFeature gf ON gf.na_feature_id = t.parent_id
+ INNER JOIN dots.nasequence nas ON gf.na_sequence_id = nas.na_sequence_id AND nas.taxon_id = :TAXON_ID
INNER JOIN dots.TranslatedAaFeature taf ON t.na_feature_id = taf.na_feature_id
INNER JOIN dots.TranslatedAaSequence tas ON taf.aa_sequence_id = tas.aa_sequence_id
LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id
LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id
LEFT JOIN apidb.AaSequenceAttribute asa ON taf.aa_sequence_id = asa.aa_sequence_id
- LEFT JOIN :ORG_ABBREVProteinGoAttributes go ON tas.aa_sequence_id = go.aa_sequence_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp go ON tas.aa_sequence_id = go.aa_sequence_id
LEFT JOIN (
SELECT aa_sequence_id, string_agg(peptide_sequence, ', ') peptide_sequence
- FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :ORG_ABBREVSignalPeptideDomains) t
+ FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :SCHEMA.:ORG_ABBREVSignalPeptideDomains) t
GROUP BY aa_sequence_id
) sigp ON tas.aa_sequence_id = sigp.aa_sequence_id
LEFT JOIN (
@@ -178,8 +185,8 @@
GROUP BY tmaf.aa_sequence_id) tms
GROUP BY tms.aa_sequence_id
) transmembrane ON tas.aa_sequence_id = transmembrane.aa_sequence_id
- LEFT JOIN :ORG_ABBREVProteinAttrsEc ec ON tas.aa_sequence_id = ec.aa_sequence_id
- LEFT JOIN :ORG_ABBREVProteinAttrsEcDerived ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp ec ON tas.aa_sequence_id = ec.aa_sequence_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id
LEFT JOIN (
SELECT af.aa_sequence_id,
string_agg(dbref.primary_identifier, ',' order by dbref.primary_identifier) as uniprot_ids
@@ -199,13 +206,13 @@
- update :ORG_ABBREVProteinAttributes gaup
+ update :SCHEMA.:ORG_ABBREVProteinAttributes gaup
set has_seqedit = 1
where source_id in (select source_id from apidb.seqedit)
;
-drop table :ORG_ABBREVGoTermList;
-drop table :ORG_ABBREVProteinGoAttributes;
-drop table :ORG_ABBREVtProteinAttrsEc;
-drop table :ORG_ABBREVtProteinAttrsEcDerived;
\ No newline at end of file
+drop table :SCHEMA.:ORG_ABBREVGoTermList_tmp;
+drop table :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp;
+drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp;
+drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp;
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence.psql b/Model/lib/psql/webtables/MO/ProteinSequence.psql
index 937e695a3f..9517f9436b 100644
--- a/Model/lib/psql/webtables/MO/ProteinSequence.psql
+++ b/Model/lib/psql/webtables/MO/ProteinSequence.psql
@@ -1,15 +1,9 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE ProteinSequence AS
WITH pAttr AS (
SELECT distinct source_id, aa_sequence_id
- FROM ProteinAttributes)
- SELECT pa.source_id, pi.name AS project_id, tas.sequence
- FROM pAttr pa, dots.TranslatedAaSequence tas, core.Projectinfo pi
+ FROM ProteinAttributes where org_abbrev = ':ORG_ABBREV')
+ SELECT pa.source_id, tas.sequence,
+ :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, current_timestamp as modification_date
+ FROM pAttr pa, dots.TranslatedAaSequence tas
WHERE pa.aa_sequence_id = tas.aa_sequence_id
- AND pi.project_id = tas.row_project_id
-
-
:DECLARE_PARTITION;
-
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
index 5ac1d5fd67..069c605c63 100644
--- a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
@@ -1,13 +1,17 @@
-
+\
- CREATE table :ORG_ABBREVTranscriptUniprot AS
+ CREATE table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp AS
select na_feature_id,
substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id,
substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal
FROM (SELECT DISTINCT t.na_feature_id, dr.primary_identifier as uniprot_id
FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t,
+ dots.genefeature gf, dots.nasequence nas
sres.ExternalDatabase d, sres.ExternalDatabaseRelease r
WHERE dr.db_ref_id = x.DB_REF_ID
+ AND t.parent_id = gf.na_feature_id
+ and gf.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
AND (x.na_feature_id = t.na_feature_id --
or x.na_feature_id = t.parent_id)
AND dr.external_database_release_id = r.external_database_release_id
@@ -23,12 +27,8 @@
;
:CREATE_AND_POPULATE
-
-
- CREATE TABLE :ORG_ABBREVTranscriptAttributes AS
WITH genefeat AS (
SELECT DISTINCT
- cast(apidb.prefixed_project_id(tn.name, ':ORG_ABBREV') as varchar(20)) as project_id,
-- first the gene attributes:
gf.source_id AS gene_source_id,
gf.na_feature_id AS gene_na_feature_id,
@@ -50,12 +50,12 @@
GREATEST(1, least(nl.start_min, nl.end_max) - 1500) AS gene_zoom_context_start,
LEAST(gsa.length, greatest(nl.start_min, nl.end_max) + 1500) AS gene_zoom_context_end,
CAST(orthologs.name AS VARCHAR(60)) AS orthomcl_name,
- coalesce(tothtssnps.total_hts_snps,0) AS gene_total_hts_snps,
- coalesce(tothtssnps.hts_nonsynonymous_snps,0) AS gene_hts_nonsynonymous_snps,
- coalesce(tothtssnps.hts_stop_codon_snps,0) AS gene_hts_stop_codon_snps,
- coalesce(tothtssnps.hts_noncoding_snps,0) AS gene_hts_noncoding_snps,
- coalesce(tothtssnps.hts_synonymous_snps,0) AS gene_hts_synonymous_snps,
- coalesce(tothtssnps.hts_nonsyn_syn_ratio,0) AS gene_hts_nonsyn_syn_ratio,
+ -- coalesce(tothtssnps.total_hts_snps,0) AS gene_total_hts_snps,
+ -- coalesce(tothtssnps.hts_nonsynonymous_snps,0) AS gene_hts_nonsynonymous_snps,
+ -- coalesce(tothtssnps.hts_stop_codon_snps,0) AS gene_hts_stop_codon_snps,
+ -- coalesce(tothtssnps.hts_noncoding_snps,0) AS gene_hts_noncoding_snps,
+ -- coalesce(tothtssnps.hts_synonymous_snps,0) AS gene_hts_synonymous_snps,
+ -- coalesce(tothtssnps.hts_nonsyn_syn_ratio,0) AS gene_hts_nonsyn_syn_ratio,
CAST(cmnt.comment_string AS VARCHAR(300)) AS comment_string,
entrez_table.entrez_id AS gene_entrez_id,
gloc.locations AS gene_locations,
@@ -76,21 +76,22 @@
gsa.sequence_type,
gsa.chromosome_order_num, gsa.na_sequence_id
FROM dots.GeneFeature gf
+ INNER JOIN dots.nasequence seq ON seq.na_sequence_id = gf.na_sequence_id and nas.taxon_id = :TAXON_ID
INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id
INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id
- INNER JOIN :ORG_ABBREVGeneLocations gloc ON gf.source_id = gloc.source_id
- LEFT JOIN :ORG_ABBREVGeneProduct gp ON gf.source_id = gp.source_id
+ INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV'
+ LEFT JOIN :SCHEMA.GeneProduct gp ON gf.source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV'
INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id
INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id
- INNER JOIN :ORG_ABBREVGenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id
+ INNER JOIN :SCHEMA.GenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id and gsa.org_abbrev = ':ORG_ABBREV'
INNER JOIN sres.TaxonName tn ON gsa.taxon_id = tn.taxon_id
INNER JOIN sres.Taxon ON gsa.taxon_id = taxon.taxon_id
INNER JOIN sres.externalDatabaseRelease soRls ON so.external_database_release_id = soRls.external_database_release_id
INNER JOIN (
- SELECT DISTINCT gene AS source_id FROM :ORG_ABBREVGeneId
+ SELECT DISTINCT gene AS source_id FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV'
) gene ON gf.source_id = gene.source_id
LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id
- LEFT JOIN :ORG_ABBREVTaxonSpecies ts ON gsa.taxon_id = ts.taxon_id
+ LEFT JOIN :SCHEMA.TaxonSpecies ts ON gsa.taxon_id = ts.taxon_id and ts.org_abbrev = ':ORG_ABBREV'
LEFT JOIN dots.geneinstance gi ON gf.na_feature_id = gi.na_feature_id
INNER JOIN sres.TaxonName species_name ON ts.species_taxon_id = species_name.taxon_id
LEFT JOIN (
@@ -116,23 +117,24 @@
AND edr.external_database_id = ed.external_database_id
AND ed.name = 'gassAWB_dbxref_gene2Deprecated_RSRC'
) deprecated ON gf.na_feature_id = deprecated.gene_na_feature_id
- LEFT JOIN (
- SELECT gene_source_id, total_hts_snps, hts_nonsynonymous_snps, hts_stop_codon_snps,hts_noncoding_snps,hts_synonymous_snps,
- case when (hts_nonsynonymous_snps is null) then 0
- when (hts_synonymous_snps = 0) then 0
- else round ((hts_nonsynonymous_snps/ hts_synonymous_snps), 2) end as hts_nonsyn_syn_ratio
- FROM (
- select gene_source_id,
- count(*) as total_hts_snps,
- sum(has_nonsynonymous_allele) as hts_nonsynonymous_snps,
- sum(has_stop_codon) as hts_stop_codon_snps,
- sum(is_noncoding_snp) as hts_noncoding_snps,
- count(*) - sum(has_nonsynonymous_allele) - sum(has_stop_codon) - sum(is_noncoding_snp) as hts_synonymous_snps
- FROM :ORG_ABBREVSnpAttributes
- WHERE gene_source_id is not null
- GROUP by gene_source_id
- ) t
- ) tothtssnps ON gf.source_id = tothtssnps.gene_source_id
+ -- NEED NEW TUNING TABLE ONCE NGS SNP WORK IS DONE
+ -- LEFT JOIN (
+ -- SELECT gene_source_id, total_hts_snps, hts_nonsynonymous_snps, hts_stop_codon_snps,hts_noncoding_snps,hts_synonymous_snps,
+ -- case when (hts_nonsynonymous_snps is null) then 0
+ -- when (hts_synonymous_snps = 0) then 0
+ -- else round ((hts_nonsynonymous_snps/ hts_synonymous_snps), 2) end as hts_nonsyn_syn_ratio
+ -- FROM (
+ -- select gene_source_id,
+ -- count(*) as total_hts_snps,
+ -- sum(has_nonsynonymous_allele) as hts_nonsynonymous_snps,
+ -- sum(has_stop_codon) as hts_stop_codon_snps,
+ -- sum(is_noncoding_snp) as hts_noncoding_snps,
+ -- count(*) - sum(has_nonsynonymous_allele) - sum(has_stop_codon) - sum(is_noncoding_snp) as hts_synonymous_snps
+ -- FROM :ORG_ABBREVSnpAttributes
+ -- WHERE gene_source_id is not null
+ -- GROUP by gene_source_id
+ -- ) t
+ -- ) tothtssnps ON gf.source_id = tothtssnps.gene_source_id
LEFT JOIN (
SELECT ssg.sequence_id as gene_na_feature_id, sg.name
FROM dots.SequenceSequenceGroup ssg,
@@ -191,6 +193,7 @@
GROUP BY dbna.na_feature_id
) entrez_table ON gf.na_feature_id = entrez_table.na_feature_id
LEFT JOIN (
+ -- TODO: PERFORMANCE
SELECT drnf.na_feature_id,
substr(string_agg(dr.primary_identifier, ';' order by dr.primary_identifier), 1, 900) as old_ids
FROM dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
@@ -208,7 +211,8 @@
AND (gf.is_predicted != 1 OR gf.is_predicted is null)
AND tn.name_class = 'scientific name'
AND tn.taxon_id NOT IN (SELECT o.taxon_id FROM apidb.Organism o WHERE o.is_annotated_genome=0)
- AND tn.name not in ('Plasmodium gallinaceum','Plasmodium reichenowi')
+ -- JB: NOT SURE WHY THERE ARE COMMENTED. (if needed, move to model)
+ --AND tn.name not in ('Plasmodium gallinaceum','Plasmodium reichenowi')
), transcript AS (
SELECT DISTINCT
t.parent_id as transcript_parent_id,
@@ -233,6 +237,8 @@
utr_lengths.five_prime_utr_length,
utr_lengths.three_prime_utr_length
FROM dots.Transcript t
+ INNER JOIN dots.genefeature gf on t.parent_id = gf.na_feature_id
+ INNER JOIN dots.nasequence nas on gf.na_sequence_id = nas.na_sequence_id and nas.taxon_id = :TAXON_ID
LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id
LEFT JOIN dots.SplicedNaSequence sns ON t.na_sequence_id = sns.na_sequence_id
INNER JOIN sres.OntologyTerm tso ON t.sequence_ontology_id = tso.ontology_term_id
@@ -256,7 +262,7 @@
GROUP BY parent_id
) three_prime ON t.na_feature_id = three_prime.parent_id
) utr_lengths ON t.na_feature_id = utr_lengths.transcript_na_feature_id
- LEFT JOIN :ORG_ABBREVTranscriptUniprot transcript_uniprot ON t.na_feature_id = transcript_uniprot.na_feature_id
+ LEFT JOIN :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp transcript_uniprot ON t.na_feature_id = transcript_uniprot.na_feature_id
LEFT JOIN (
SELECT na_feature_id, max(product) as product
FROM apidb.TranscriptProduct
@@ -272,7 +278,9 @@
tl.is_top_level=1
)
SELECT DISTINCT
- genefeat.project_id,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
transcript.transcript_source_id AS source_id,
-- first the gene attributes:
genefeat.gene_source_id,
@@ -302,12 +310,12 @@
genefeat.gene_zoom_context_start,
genefeat.gene_zoom_context_end,
genefeat.orthomcl_name,
- genefeat.gene_total_hts_snps,
- genefeat.gene_hts_nonsynonymous_snps,
- genefeat.gene_hts_stop_codon_snps,
- genefeat.gene_hts_noncoding_snps,
- genefeat.gene_hts_synonymous_snps,
- genefeat.gene_hts_nonsyn_syn_ratio,
+ -- genefeat.gene_total_hts_snps,
+ -- genefeat.gene_hts_nonsynonymous_snps,
+ -- genefeat.gene_hts_stop_codon_snps,
+ -- genefeat.gene_hts_noncoding_snps,
+ -- genefeat.gene_hts_synonymous_snps,
+ -- genefeat.gene_hts_nonsyn_syn_ratio,
genefeat.comment_string,
transcript.uniprot_id, transcript.uniprot_id_internal,
genefeat.gene_entrez_id,
@@ -375,55 +383,60 @@
transcript.three_prime_utr_length
FROM genefeat
INNER JOIN transcript ON genefeat. gene_na_feature_id = transcript.transcript_parent_id
- LEFT JOIN :ORG_ABBREVproteinattributes pa ON transcript.transcript_source_id = pa.transcript_source_id AND pa.rank_in_transcript = 1
+ LEFT JOIN :SCHEMA.proteinattributes pa ON pa.org_abbrev = ':ORG_ABBREV' and transcript.transcript_source_id = pa.transcript_source_id AND pa.rank_in_transcript = 1
ORDER BY taxon_id, source_id
:DECLARE_PARTITION;
-
- UPDATE :ORG_ABBREVTranscriptAttributes ta
- SET exon_count = (SELECT count(*) + 1 FROM apidb.IntronLocation il WHERE il.parent_id = ta.na_feature_id AND il.end_max - il.start_min + 1 > 10 )
+ -- ADDED ORG_ABBREV filter in 2 places just in case
+ UPDATE :SCHEMA.TranscriptAttributes ta
+ SET exon_count = (SELECT count(*) + 1 FROM apidb.IntronLocation il WHERE ta.org_abbrev = ':ORG_ABBREV' AND il.parent_id = ta.na_feature_id AND il.end_max - il.start_min + 1 > 10 )
WHERE ta.project_id = 'TriTrypDB'
+ AND ta.org_abbrev = ':ORG_ABBREV'
;
- UPDATE :ORG_ABBREVTranscriptAttributes gaup
+ UPDATE :SCHEMA.TranscriptAttributes gaup
SET gene_paralog_number = (
SELECT count(distinct gene_source_id)
- FROM :ORG_ABBREVTranscriptAttributes g1
+ FROM :SCHEMA.TranscriptAttributes g1
WHERE g1.orthomcl_name = gaup.orthomcl_name
AND g1.organism = gaup.organism
AND gaup.gene_source_id != g1.gene_source_id
+ AND g1.org_abbrev = ':ORG_ABBREV'
),
gene_ortholog_number = (
SELECT count(distinct gene_source_id)
- FROM :ORG_ABBREVTranscriptAttributes g1
+ FROM :SCHEMA.TranscriptAttributes g1
WHERE g1.orthomcl_name = gaup.orthomcl_name
AND g1.organism != gaup.organism
+ AND g1.org_abbrev = ':ORG_ABBREV'
)
WHERE (gaup.gene_type = 'protein coding' or gaup.gene_type = 'protein coding gene')
+ and gaup.org_abbrev = ':ORG_ABBREV'
;
-
- UPDATE :ORG_ABBREVTranscriptAttributes
- SET gene_id = gene_na_feature_id + (select coalesce(max(gene_id), 0) from dots.gene)
+-- TODO: test this
+ UPDATE :SCHEMA.TranscriptAttributes
+ SET gene_id = gene_na_feature_id + (select nextval('dots.gene_sq'))
WHERE gene_id is null
+ and org_abbrev = ':ORG_ABBREV'
;
-
- UPDATE :ORG_ABBREVTranscriptAttributes
+ -- TODO: THIS IS BROKEN.
+ UPDATE :SCHEMA.TranscriptAttributes
SET representative_transcript = (
select min(source_id)
- from :ORG_ABBREVTranscriptAttributes ga
- where ga.gene_source_id = :ORG_ABBREVTranscriptAttributes .gene_source_id
+ from :SCHEMA.TranscriptAttributes ga
+ where ga.gene_source_id = :SCHEMA.TranscriptAttributes.gene_source_id
)
WHERE representative_transcript is null
AND gene_id is not null
@@ -431,11 +444,12 @@
;
-
+ -- TODO: THIS IS BROKEN.
UPDATE :ORG_ABBREVTranscriptAttributes
SET representative_transcript = source_id
WHERE representative_transcript is null
;
-
-drop table :ORG_ABBREVTranscriptUniprot
\ No newline at end of file
+ -- TODO: THIS IS BROKEN.
+drop table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp
+;
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index 1265054f2e..4373beaeb9 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -30,10 +30,10 @@
- [X] TransmembraneDomains.psql
- [X] PdbSimilarity_ix.psql
- [X] PdbSimilarity.psql
- - [ ] ProteinSequence_ix.psql
- - [ ] ProteinSequence.psql
- - [ ] ProteinAttributes_ix.psql
- - [ ] ProteinAttributes.psql
+ - [X] ProteinSequence_ix.psql
+ - [X] ProteinSequence.psql
+ - [X] ProteinAttributes_ix.psql
+ - [X] ProteinAttributes.psql
- [ ] TranscriptAttributes_ix.psql
- [ ] TranscriptAttributes.psql
- [ ] CodingSequence_ix.psql
From b758e7cf387c6e2b33b94dd412a2055e8eedc2a7 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 13 May 2025 11:26:36 -0400
Subject: [PATCH 008/112] fix quotes and update some tables
---
.../psql/webtables/MO/ChIPchipTranscript.psql | 11 ++++-------
Model/lib/psql/webtables/MO/CodingSequence.psql | 11 ++++-------
.../psql/webtables/MO/GenomicSeqAttributes.psql | 4 ++--
.../lib/psql/webtables/MO/GenomicSequenceId.psql | 2 +-
.../webtables/MO/GenomicSequenceSequence.psql | 4 ++--
.../psql/webtables/MO/OrganismAbbreviation.psql | 2 +-
.../webtables/MO/OrganismAbbreviationBlast.psql | 2 +-
.../webtables/MO/OrganismSelectTaxonRank.psql | 4 ++--
Model/lib/psql/webtables/MO/PdbSimilarity.psql | 4 ++--
Model/lib/psql/webtables/MO/ProteinSequence.psql | 2 +-
.../psql/webtables/MO/SignalPeptideDomains.psql | 4 ++--
Model/lib/psql/webtables/MO/TaxonSpecies.psql | 4 ++--
Model/lib/psql/webtables/MO/Taxonomy.psql | 2 +-
.../psql/webtables/MO/TranscriptCenDistance.psql | 13 ++++++-------
.../psql/webtables/MO/TranscriptSequence.psql | 10 +++-------
.../psql/webtables/MO/TransmembraneDomains.psql | 4 ++--
Model/lib/xml/tuningManager/webtables.org | 16 ++++++++--------
17 files changed, 44 insertions(+), 55 deletions(-)
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
index fe64a5fecf..04065d2b26 100644
--- a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
+++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
@@ -1,8 +1,6 @@
-:CREATE_AND_POPULATE
-
-
- CREATE TABLE ChIPchipTranscript AS
+ :CREATE_AND_POPULATE
SELECT DISTINCT ta.source_id, ta.gene_source_id, ta.project_id, sr.protocol_app_node_id,
+ ta.project_id, ta.org_abbrev, current_timestamp as modification_date
CASE
WHEN ta.is_reversed = 0
THEN round(abs(ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0)
@@ -28,7 +26,7 @@
END
END as direction,
sr.score1 as score
- FROM TranscriptAttributes ta,
+ FROM :SCHEMA.TranscriptAttributes ta,
Results.segmentresult sr,
Study.StudyLink sl,
Study.Study s
@@ -38,7 +36,6 @@
AND lower(s.name) like '%chip%peaks'
AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000)
or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) )
-
-
+ AND ta.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/CodingSequence.psql b/Model/lib/psql/webtables/MO/CodingSequence.psql
index e1560b855f..a1682717e9 100644
--- a/Model/lib/psql/webtables/MO/CodingSequence.psql
+++ b/Model/lib/psql/webtables/MO/CodingSequence.psql
@@ -1,14 +1,11 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE CodingSequence AS
- SELECT ta.source_id, ta.project_id,
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ ta.source_id,
SUBSTR(sns.sequence, tf.translation_start::INTEGER,
tf.translation_stop::INTEGER - tf.translation_start::INTEGER + 1) as sequence
- FROM TranscriptAttributes ta, dots.SplicedNaSequence sns, dots.TranslatedAaFeature tf
+ FROM :SCHEMA.TranscriptAttributes ta, dots.SplicedNaSequence sns, dots.TranslatedAaFeature tf
WHERE ta.source_id = sns.source_id
AND ta.na_feature_id = tf.na_feature_id
-
-
+ AND ta.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
index 6b964c4328..d00a1a5e8d 100644
--- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
@@ -1,7 +1,7 @@
:CREATE_AND_POPULATE
SELECT
- :PROJECT_ID as project_id,
- :ORG_ABBREV as org_abbrev,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
current_timestamp as modification_date,
SUBSTR(sequence.source_id, 1, 60) AS source_id, sequence.a_count,
sequence.c_count, sequence.g_count, sequence.t_count,
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
index ee30a1c85f..489b450430 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql
@@ -1,6 +1,6 @@
:CREATE_AND_POPULATE
SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence,
- :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date
FROM (
SELECT ns.source_id as id, ns.source_id as sequence
FROM dots.NaSequence ns, sres.OntologyTerm oterm
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
index 86d8919c6b..bc1f31fb20 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
@@ -1,6 +1,6 @@
:CREATE_AND_POPULATE
- SELECT :PROJECT_ID as project_id,
- :ORG_ABBREV as org_abbrev,
+ SELECT ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
current_timestamp as modification_date,
sa.source_id,
ns.sequence
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
index 5490dbfaad..bcdedad216 100644
--- a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
@@ -1,6 +1,6 @@
:CREATE_AND_POPULATE
select tn.name as organism, o.name_for_filenames,
- :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date
from apidb.Organism o, sres.TaxonName tn
where o.taxon_id = tn.taxon_id
and tn.name_class = 'scientific name'
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
index cee5ffe550..faf2286741 100644
--- a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
@@ -1,5 +1,5 @@
:CREATE_AND_POPULATE
- select sub.*, :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date
+ select sub.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date
from (
SELECT organism, parent, abbreviation
FROM OrganismAbbreviationWS
diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
index d37233b9b2..34c531b5c8 100644
--- a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
+++ b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
@@ -26,8 +26,8 @@
AND tn1.taxon_id = o.taxon_id
)
SELECT
- :PROJECT_ID as project_id
- , :ORG_ABBREV as org_abbrev
+ ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
, current_timestamp as modification_date
, organisms.organism
, organisms.public_abbrev
diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity.psql b/Model/lib/psql/webtables/MO/PdbSimilarity.psql
index 7a2644ce2b..b3de1cfe9b 100644
--- a/Model/lib/psql/webtables/MO/PdbSimilarity.psql
+++ b/Model/lib/psql/webtables/MO/PdbSimilarity.psql
@@ -1,6 +1,6 @@
:CREATE_AND_POPULATE
- SELECT :PROJECT_ID as project_id,
- :ORG_ABBREV as org_abbrev,
+ SELECT ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
current_timestamp as modification_date,
ta.source_id, eas.source_id AS pdb_chain,
substr(eas.description, 1, 100) AS pdb_title,
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence.psql b/Model/lib/psql/webtables/MO/ProteinSequence.psql
index 9517f9436b..68a8664412 100644
--- a/Model/lib/psql/webtables/MO/ProteinSequence.psql
+++ b/Model/lib/psql/webtables/MO/ProteinSequence.psql
@@ -3,7 +3,7 @@
SELECT distinct source_id, aa_sequence_id
FROM ProteinAttributes where org_abbrev = ':ORG_ABBREV')
SELECT pa.source_id, tas.sequence,
- :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, current_timestamp as modification_date
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM pAttr pa, dots.TranslatedAaSequence tas
WHERE pa.aa_sequence_id = tas.aa_sequence_id
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
index 2ce374b972..a23b7ea12b 100644
--- a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
+++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
@@ -1,7 +1,7 @@
:CREATE_AND_POPULATE
SELECT
- :PROJECT_ID as project_id
- , :ORG_ABBREV as org_abbrev
+ ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
, current_timestamp as modification_date
, gf.source_id gene_source_id
, t.source_id transcript_source_id
diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webtables/MO/TaxonSpecies.psql
index d0aa9578a2..b7d15cbbdc 100644
--- a/Model/lib/psql/webtables/MO/TaxonSpecies.psql
+++ b/Model/lib/psql/webtables/MO/TaxonSpecies.psql
@@ -11,8 +11,8 @@
WHERE cte.parent_id = sub.taxon_id
)
SELECT c.taxon_id, c.parent_id as species_taxon_id,
- :PROJECT_ID as project_id,
- :ORG_ABBREV as org_abbrev,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
current_timestamp as modification_date
FROM cte c, sres.taxon t
WHERE t.taxon_id = c.parent_id
diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webtables/MO/Taxonomy.psql
index 8fe3bf229c..87f0a5c1d9 100644
--- a/Model/lib/psql/webtables/MO/Taxonomy.psql
+++ b/Model/lib/psql/webtables/MO/Taxonomy.psql
@@ -18,6 +18,6 @@
AND tax.name != 'root'
)
SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum,
- :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, current_timestamp as modification_date
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (SELECT cte.* FROM cte ORDER BY path) t
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql
index 9715d5f08b..f1f9056005 100644
--- a/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql
@@ -1,19 +1,18 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE TranscriptCenDistance AS
- SELECT DISTINCT tl.feature_source_id AS transcript,
+ SELECT DISTINCT
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ tl.feature_source_id AS transcript,
LEAST(ABS(mfl.start_min - tl.end_max),
ABS(mfl.end_max - tl.start_min)) AS centromere_distance,
tl.sequence_source_id AS genomic_sequence
FROM apidb.TranscriptLocation tl, apidb.FeatureLocation mfl,
- sres.OntologyTerm so
+ dots.nasequence nas, sres.OntologyTerm so
WHERE tl.na_sequence_id = mfl.na_sequence_id
AND mfl.feature_type = 'Miscellaneous'
AND mfl.sequence_ontology_id = so.ontology_term_id
AND so.name = 'centromere'
AND tl.is_top_level = 1
-
-
+ and mfl.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence.psql b/Model/lib/psql/webtables/MO/TranscriptSequence.psql
index 0cc12e2ff2..edccc1d800 100644
--- a/Model/lib/psql/webtables/MO/TranscriptSequence.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptSequence.psql
@@ -1,11 +1,7 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE :ORG_ABBREVTranscriptSequence AS
- SELECT ta.source_id, ta.project_id, sns.sequence
- FROM :ORG_ABBREVTranscriptAttributes ta, dots.SplicedNaSequence sns
+ SELECT ta.source_id, ta.project_id, ta.org_abbrev, sns.sequence, current_timestamp as modification_date
+ FROM :SCHEMA.TranscriptAttributes ta, dots.SplicedNaSequence sns
WHERE ta.source_id = sns.source_id
-
-
+ AND ta.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql
index 99406037ee..d8915ea386 100644
--- a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql
+++ b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql
@@ -1,6 +1,6 @@
:CREATE_AND_POPULATE
- SELECT :PROJECT_ID as project_id
- , :ORG_ABBREV as org_abbrev
+ SELECT ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
, current_timestamp as modification_date
, ta.source_id as transcript_source_id
, ta.gene_source_id AS gene_source_id
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index 4373beaeb9..6373429173 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -36,18 +36,18 @@
- [X] ProteinAttributes.psql
- [ ] TranscriptAttributes_ix.psql
- [ ] TranscriptAttributes.psql
- - [ ] CodingSequence_ix.psql
- - [ ] CodingSequence.psql
+ - [s] CodingSequence_ix.psql
+ - [s] CodingSequence.psql
- [ ] IntronUtrCoords_ix.psql
- [ ] IntronUtrCoords.psql
- - [ ] TranscriptCenDistance_ix.psql
- - [ ] TranscriptCenDistance.psql
+ - [s] TranscriptCenDistance_ix.psql
+ - [s] TranscriptCenDistance.psql
- [ ] TranscriptPathway_ix.psql
- [ ] TranscriptPathway.psql
- - [ ] TranscriptSequence_ix.psql
- - [ ] TranscriptSequence.psql
- - [ ] ChIPchipTranscript_ix.psql
- - [ ] ChIPchipTranscript.psql
+ - [s] TranscriptSequence_ix.psql
+ - [s] TranscriptSequence.psql
+ - [s] ChIPchipTranscript_ix.psql
+ - [s] ChIPchipTranscript.psql
- Gene
- [ ] GeneId_ix.psql
From ce9d81e8ae45be7932cec71f3815c3914e93e4ca Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 13 May 2025 11:57:18 -0400
Subject: [PATCH 009/112] fix _ix files
---
.../webtables/MO/ChIPchipTranscript_ix.psql | 2 +-
.../psql/webtables/MO/ChrCopyNumbers_ix.psql | 8 +--
.../psql/webtables/MO/CodingSequence_ix.psql | 2 +-
Model/lib/psql/webtables/MO/EqtlSpan_ix.psql | 4 +-
.../psql/webtables/MO/GeneAttributes_ix.psql | 42 ++++++-------
.../psql/webtables/MO/GeneCopyNumbers_ix.psql | 4 +-
.../lib/psql/webtables/MO/GeneGoTable_ix.psql | 2 +-
.../lib/psql/webtables/MO/GeneGoTerms_ix.psql | 2 +-
Model/lib/psql/webtables/MO/GeneId_ix.psql | 10 +--
.../webtables/MO/GeneIntJuncStats_ix.psql | 2 +-
.../webtables/MO/GeneIntronJunction_ix.psql | 8 +--
.../psql/webtables/MO/GeneLocations_ix.psql | 4 +-
.../webtables/MO/GeneMaxIntronGIJ_ix.psql | 2 +-
.../psql/webtables/MO/GeneModelDump_ix.psql | 4 +-
.../webtables/MO/GenomicSeqAttributes_ix.psql | 8 +--
.../webtables/MO/GenomicSequenceId_ix.psql | 6 +-
.../MO/GenomicSequenceSequence_ix.psql | 2 +-
.../psql/webtables/MO/GoTermSummary_ix.psql | 4 +-
.../psql/webtables/MO/IntronUtrCoords_ix.psql | 8 +--
.../psql/webtables/MO/NameMappingGIJ_ix.psql | 2 +-
.../webtables/MO/OrganismAttributes_ix.psql | 2 +-
.../webtables/MO/PathwaysGeneTable_ix.psql | 2 +-
.../psql/webtables/MO/ProfileSamples_ix.psql | 8 +--
Model/lib/psql/webtables/MO/Profile_ix.psql | 12 ++--
.../webtables/MO/ProteinAttributes_ix.psql | 4 +-
.../psql/webtables/MO/ProteinSequence_ix.psql | 2 +-
.../webtables/MO/SignalPeptideDomains_ix.psql | 8 +--
Model/lib/psql/webtables/MO/TFBSGene_ix.psql | 4 +-
Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 4 +-
.../webtables/MO/TranscriptAttributes_ix.psql | 62 +++++++++----------
.../MO/TranscriptCenDistance_ix.psql | 4 +-
.../webtables/MO/TranscriptPathway_ix.psql | 8 +--
.../webtables/MO/TranscriptSequence_ix.psql | 2 +-
.../webtables/MO/TransmembraneDomains_ix.psql | 4 +-
34 files changed, 126 insertions(+), 126 deletions(-)
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
index cfe11ee5c6..bd8aaf4411 100644
--- a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
+++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.:ORG_ABBREVchpgene_geneid_idx ON :SCHEMA.:ORG_ABBREVChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id)
+ create index :SCHEMA.chpgene_geneid_idx ON :SCHEMA.ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id)
;
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
index d2d0448a29..9d0e711555 100644
--- a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
+++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
@@ -1,9 +1,9 @@
- CREATE INDEX :SCHEMA.:ORG_ABBREVChrCN_ix
- ON :SCHEMA.:ORG_ABBREVChrCopyNumbers (input_pan_id, na_sequence_id)
+ CREATE INDEX :SCHEMA.ChrCN_ix
+ ON :SCHEMA.ChrCopyNumbers (input_pan_id, na_sequence_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVChrCN_output
- ON :SCHEMA.:ORG_ABBREVChrCopyNumbers (output_pan_id)
+ CREATE INDEX :SCHEMA.ChrCN_output
+ ON :SCHEMA.ChrCopyNumbers (output_pan_id)
;
diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
index 725030a983..7d3114121f 100644
--- a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.:ORG_ABBREVCodSeq_ix on :SCHEMA.:ORG_ABBREVCodingSequence (source_id, project_id)
+ create index :SCHEMA.CodSeq_ix on :SCHEMA.CodingSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
index 2a14698811..f4babf6466 100644
--- a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
+++ b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.:ORG_ABBREVeqtlSpan_ix
- on :SCHEMA.:ORG_ABBREVeqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score)
+ create index :SCHEMA.eqtlSpan_ix
+ on :SCHEMA.eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score)
;
diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
index 27f307d5dc..bd524c4b7a 100644
--- a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
@@ -1,47 +1,47 @@
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_srcPrj
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (source_id)
+ CREATE UNIQUE INDEX :SCHEMA.GeneAttr_srcPrj
+ ON :SCHEMA.GeneAttributes (source_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_exon_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (exon_count, source_id, project_id)
+ CREATE INDEX :SCHEMA.GeneAttr_exon_ix
+ ON :SCHEMA.GeneAttributes (exon_count, source_id, project_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_loc_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated)
+ CREATE INDEX :SCHEMA.GeneAttr_loc_ix
+ ON :SCHEMA.GeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_feat_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed)
+ CREATE INDEX :SCHEMA.GeneAttr_feat_ix
+ ON :SCHEMA.GeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_orthoname_ix ON :SCHEMA.:ORG_ABBREVGeneAttributes (
+ CREATE INDEX :SCHEMA.GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes (
orthomcl_name, source_id, taxon_id, gene_type, na_feature_id,
na_sequence_id, start_min, end_max, organism, species,
product, project_id
)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_ortholog_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id)
+ CREATE INDEX :SCHEMA.GeneAttr_ortholog_ix
+ ON :SCHEMA.GeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_orgsrc_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (organism, source_id, na_sequence_id, start_min, end_max)
+ CREATE INDEX :SCHEMA.GeneAttr_orgsrc_ix
+ ON :SCHEMA.GeneAttributes (organism, source_id, na_sequence_id, start_min, end_max)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_prjsrc_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0))
+ CREATE INDEX :SCHEMA.GeneAttr_prjsrc_ix
+ ON :SCHEMA.GeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0))
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_txid_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id)
+ CREATE INDEX :SCHEMA.GeneAttr_txid_ix
+ ON :SCHEMA.GeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_ids_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_feature_id, source_id, project_id)
+ CREATE INDEX :SCHEMA.GeneAttr_ids_ix
+ ON :SCHEMA.GeneAttributes (na_feature_id, source_id, project_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_loc_intjunc_ix
- ON :SCHEMA.:ORG_ABBREVGeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX)
+ CREATE INDEX :SCHEMA.GeneAttr_loc_intjunc_ix
+ ON :SCHEMA.GeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX)
;
diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
index 0108c24e21..4cb38aaa7f 100644
--- a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
@@ -1,4 +1,4 @@
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneCN_ix
- ON :SCHEMA.:ORG_ABBREVGeneCopyNumbers (input_pan_id, na_sequence_id)
+ CREATE INDEX :SCHEMA.GeneCN_ix
+ ON :SCHEMA.GeneCopyNumbers (input_pan_id, na_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
index 50f4e8b3bb..a2ef5c49ec 100644
--- a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.:ORG_ABBREVggtab_ix ON :SCHEMA.:ORG_ABBREVGeneGoTable
+ create index :SCHEMA.ggtab_ix ON :SCHEMA.GeneGoTable
(source_id, project_id, go_id, transcript_ids, is_not, go_term_name,
ontology, source, evidence_code, reference, evidence_code_parameter, sort_key)
;
diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
index 8d6745ff93..d8456c06e7 100644
--- a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
@@ -1,4 +1,4 @@
- create index ggt_ix ON :SCHEMA.:ORG_ABBREVGeneGoTerms
+ create index ggt_ix ON :SCHEMA.GeneGoTerms
(gene_source_id, transcript_source_id, ontology, go_id, go_term_id,
go_term_name, source, evidence_code, reference,
evidence_code_parameter, aa_sequence_id, is_not)
diff --git a/Model/lib/psql/webtables/MO/GeneId_ix.psql b/Model/lib/psql/webtables/MO/GeneId_ix.psql
index 55e4ca91c9..2f2fd86202 100644
--- a/Model/lib/psql/webtables/MO/GeneId_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneId_ix.psql
@@ -1,15 +1,15 @@
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_gene_idx ON :SCHEMA.:ORG_ABBREVGeneId (gene, id)
+ CREATE INDEX :SCHEMA.GeneId_gene_idx ON :SCHEMA.GeneId (gene, id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_id_idx ON :SCHEMA.:ORG_ABBREVGeneId (id, gene)
+ CREATE INDEX :SCHEMA.GeneId_id_idx ON :SCHEMA.GeneId (id, gene)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_uniqid_idx ON :SCHEMA.:ORG_ABBREVGeneId (unique_mapping, id, gene)
+ CREATE INDEX :SCHEMA.GeneId_uniqid_idx ON :SCHEMA.GeneId (unique_mapping, id, gene)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_lowid_idx ON :SCHEMA.:ORG_ABBREVGeneId (lower(id), gene)
+ CREATE INDEX :SCHEMA.GeneId_lowid_idx ON :SCHEMA.GeneId (lower(id), gene)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_uniqlowid_idx ON :SCHEMA.:ORG_ABBREVGeneId (unique_mapping, lower(id), gene)
+ CREATE INDEX :SCHEMA.GeneId_uniqlowid_idx ON :SCHEMA.GeneId (unique_mapping, lower(id), gene)
;
diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
index 063e77a1c3..c295007639 100644
--- a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.:ORG_ABBREVGeneIntJuncStat_ix on :SCHEMA.:ORG_ABBREVGeneIntJuncStats (na_sequence_id)
+ create index :SCHEMA.GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
index 6e650ac67c..14c8fdbd4a 100644
--- a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
@@ -1,11 +1,11 @@
- create index :SCHEMA.:ORG_ABBREVgijnew_loc_ix on :SCHEMA.:ORG_ABBREVGeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
+ create index :SCHEMA.gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
;
- create index :SCHEMA.:ORG_ABBREVgijnew_gnscid_ix on :SCHEMA.:ORG_ABBREVGeneIntronJunction (intron_feature_id)
+ create index :SCHEMA.gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id)
;
- create index :SCHEMA.:ORG_ABBREVgijnew_txnloc_ix
- on :SCHEMA.:ORG_ABBREVGeneIntronJunction
+ create index :SCHEMA.gijnew_txnloc_ix
+ on :SCHEMA.GeneIntronJunction
(taxon_id, na_sequence_id, segment_start, segment_end, is_reversed,
total_unique, total_isrpm, annotated_intron)
diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
index 45fdf888d5..135dfff694 100644
--- a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
@@ -1,4 +1,4 @@
- create :SCHEMA.:ORG_ABBREVindex gloc_ix
- on :SCHEMA.:ORG_ABBREVGeneLocations (source_id, locations)
+ create :SCHEMA.index gloc_ix
+ on :SCHEMA.GeneLocations (source_id, locations)
;
diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
index 0491ee072b..927c41fb89 100644
--- a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
@@ -1,3 +1,3 @@
- CREATE INDEX :SCHEMA.:ORG_ABBREVGnMxIntGIJ_ix on :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJ (gene_source_id,protocol_app_node_id)
+ CREATE INDEX :SCHEMA.GnMxIntGIJ_ix on :SCHEMA.GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
index 4fa21c53fa..25fc836319 100644
--- a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
@@ -1,5 +1,5 @@
- create index :SCHEMA.:ORG_ABBREVgmd_ix
- on :SCHEMA.:ORG_ABBREVGeneModelDump
+ create index :SCHEMA.gmd_ix
+ on :SCHEMA.GeneModelDump
(source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids)
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
index e9b08230da..709e8d2934 100644
--- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
@@ -1,11 +1,11 @@
- create unique index :SCHEMA.:ORG_ABBREVpk_SeqAttr_ ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id)
+ create unique index :SCHEMA.pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id)
;
- create unique index :SCHEMA.:ORG_ABBREVSeqAttr_source_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (source_id)
+ create unique index :SCHEMA.SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id)
;
- create unique index :SCHEMA.:ORG_ABBREVSeqAttr_naseqid ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (na_sequence_id)
+ create unique index :SCHEMA.SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id)
;
- create unique index :SCHEMA.:ORG_ABBREVSeqAttr_taxsrc_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id)
+ create unique index :SCHEMA.SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id)
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
index c8b82387ac..093edf9acb 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
@@ -1,9 +1,9 @@
- CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_sequence_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (sequence, id)
+ CREATE INDEX :SCHEMA.GenSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_id_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (id, sequence)
+ CREATE INDEX :SCHEMA.GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_lowid_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (lower(id), sequence)
+ CREATE INDEX :SCHEMA.GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence)
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
index 8cf38e086b..ea41f21285 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.:ORG_ABBREVGenomicSeq_ix on :SCHEMA.:ORG_ABBREVGenomicSequenceSequence (source_id, project_id)
+ create index :SCHEMA.GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
index aff06120f1..0b0ef12e90 100644
--- a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
+++ b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
@@ -1,7 +1,7 @@
- create index :SCHEMA.:ORG_ABBREVGoTermSum_aaSeqId_idx ON :SCHEMA.:ORG_ABBREVGoTermSummary (aa_sequence_id, go_id, source)
+ create index :SCHEMA.GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source)
;
- create index :SCHEMA.:ORG_ABBREVGoTermSum_plugin_ix ON :SCHEMA.:ORG_ABBREVGoTermSummary
+ create index :SCHEMA.GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary
(ontology, gene_source_id, is_not, is_go_slim,
go_id, go_term_name, evidence_code, evidence_category)
;
diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
index e844e8da5b..c1359297b9 100644
--- a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
+++ b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
@@ -1,8 +1,8 @@
- CREATE INDEX :SCHEMA.:ORG_ABBREViuc_srcid_ix
- ON :SCHEMA.:ORG_ABBREVIntronUtrCoords (source_id, na_feature_id)
+ CREATE INDEX :SCHEMA.iuc_srcid_ix
+ ON :SCHEMA.IntronUtrCoords (source_id, na_feature_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREViuc_nfid_ix
- ON :SCHEMA.:ORG_ABBREVIntronUtrCoords (na_feature_id, source_id)
+ CREATE INDEX :SCHEMA.iuc_nfid_ix
+ ON :SCHEMA.IntronUtrCoords (na_feature_id, source_id)
;
diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
index 6a25756c1e..443efe39c8 100644
--- a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
+++ b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.:ORG_ABBREVnamemappinggij_ix on :SCHEMA.:ORG_ABBREVNameMappingGIJ (junctions_pan_id,exp_pan_id)
+ create index :SCHEMA.namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id)
;
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
index 27ab99b34a..b018637f7d 100644
--- a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
@@ -1,3 +1,3 @@
-create unique index :SCHEMA.:ORG_ABBREVOrganism_sourceId_idx ON :SCHEMA.:ORG_ABBREVOrganismAttributes (source_id)
+create unique index :SCHEMA.Organism_sourceId_idx ON :SCHEMA.OrganismAttributes (source_id)
;
diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
index fe2988a30e..2cc01784ca 100644
--- a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
+++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.:ORG_ABBREVpgt_ix on :SCHEMA.:ORG_ABBREVPathwaysGeneTable
+ create index :SCHEMA.pgt_ix on :SCHEMA.PathwaysGeneTable
(gene_source_id, project_id, pathway_source_id, pathway_name,
reactions, enzyme, expasy_url, pathway_source, exact_match)
diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
index 82054a5477..fceecb4adf 100644
--- a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
@@ -1,12 +1,12 @@
- create :SCHEMA.:ORG_ABBREVindex psamp_ix
- on :SCHEMA.:ORG_ABBREVProfileSamples
+ create :SCHEMA.index psamp_ix
+ on :SCHEMA.ProfileSamples
(dataset_name, profile_type, study_id, node_order_num,
protocol_app_node_id, profile_set_suffix, study_name,
node_type, protocol_app_node_name)
;
- create index :SCHEMA.:ORG_ABBREVpsampstdy_ix
- on :SCHEMA.:ORG_ABBREVProfileSamples
+ create index :SCHEMA.psampstdy_ix
+ on :SCHEMA.ProfileSamples
(study_name, node_type, profile_type, node_order_num,
protocol_app_node_id, profile_set_suffix, study_id,
protocol_app_node_name, dataset_name)
diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/MO/Profile_ix.psql
index 0839baa093..e4aebd5c31 100644
--- a/Model/lib/psql/webtables/MO/Profile_ix.psql
+++ b/Model/lib/psql/webtables/MO/Profile_ix.psql
@@ -1,12 +1,12 @@
- create index :SCHEMA.:ORG_ABBREVexprof_idx
- on :SCHEMA.:ORG_ABBREVProfile (source_id, profile_type, profile_set_name)
+ create index :SCHEMA.exprof_idx
+ on :SCHEMA.Profile (source_id, profile_type, profile_set_name)
;
- create index :SCHEMA.:ORG_ABBREVprofset_idx
- on :SCHEMA.:ORG_ABBREVProfile (profile_set_name, profile_type)
+ create index :SCHEMA.profset_idx
+ on :SCHEMA.Profile (profile_set_name, profile_type)
;
- create index :SCHEMA.:ORG_ABBREVsrcdset_idx
- on :SCHEMA.:ORG_ABBREVProfile (source_id, dataset_subtype, dataset_type)
+ create index :SCHEMA.srcdset_idx
+ on :SCHEMA.Profile (source_id, dataset_subtype, dataset_type)
;
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
index 9279d85729..2faff407d3 100644
--- a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
@@ -1,6 +1,6 @@
- CREATE INDEX :SCHEMA.:ORG_ABBREVPA_sourceId ON :SCHEMA.:ORG_ABBREVProteinAttributes (source_id)
+ CREATE INDEX :SCHEMA.PA_sourceId ON :SCHEMA.ProteinAttributes (source_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVPA_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinAttributes (aa_sequence_id)
+ CREATE INDEX :SCHEMA.PA_aaSequenceId ON :SCHEMA.ProteinAttributes (aa_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
index 7f37d152ef..a11708b620 100644
--- a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.:ORG_ABBREVProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id)
+ create index :SCHEMA.ProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
index ddd24897a3..5cd5d15432 100644
--- a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
+++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
@@ -1,8 +1,8 @@
- CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP1_ix
- ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (aa_sequence_id)
+ CREATE INDEX :SCHEMA.SignalP1_ix
+ ON :SCHEMA.SignalPeptideDomains (aa_sequence_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP2_ix
- ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max)
+ CREATE INDEX :SCHEMA.SignalP2_ix
+ ON :SCHEMA.SignalPeptideDomains (gene_source_id, transcript_source_id, end_max)
;
diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
index 7563dac0c5..5acc3e9f9b 100644
--- a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
+++ b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
@@ -1,6 +1,6 @@
- create index :SCHEMA.:ORG_ABBREVtfbs_geneid_idx ON :SCHEMA.:ORG_ABBREVTFBSGene (gene_source_id, tfbs_na_feature_id)
+ create index :SCHEMA.tfbs_geneid_idx ON :SCHEMA.TFBSGene (gene_source_id, tfbs_na_feature_id)
;
- create index :SCHEMA.:ORG_ABBREVgeneid_tfbs_idx ON :SCHEMA.:ORG_ABBREVTFBSGene (tfbs_na_feature_id,gene_source_id)
+ create index :SCHEMA.geneid_tfbs_idx ON :SCHEMA.TFBSGene (tfbs_na_feature_id,gene_source_id)
;
diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
index b3e8518849..7b29c4aa03 100644
--- a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
+++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
@@ -1,5 +1,5 @@
- create index :SCHEMA.:ORG_ABBREVtax_ix
- on :SCHEMA.:ORG_ABBREVTaxonomy
+ create index :SCHEMA.tax_ix
+ on :SCHEMA.Taxonomy
(organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
index 481959c6d7..08b54452aa 100644
--- a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
@@ -1,71 +1,71 @@
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_sourceId
- ON :ORG_ABBREVTranscriptAttributes (source_id)
+ CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_sourceId
+ ON TranscriptAttributes (source_id)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_srcPrj
- ON :ORG_ABBREVTranscriptAttributes (source_id, gene_source_id, project_id)
+ CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_srcPrj
+ ON TranscriptAttributes (source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_genesrc
- ON :ORG_ABBREVTranscriptAttributes (gene_source_id, source_id, project_id)
+ CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_genesrc
+ ON TranscriptAttributes (gene_source_id, source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_exon_ix
- ON :ORG_ABBREVTranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id)
+ CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_exon_ix
+ ON TranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_loc_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes
+ CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_loc_ix
+ ON :SCHEMA.TranscriptAttributes
(na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id,
is_deprecated, source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_feat_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id)
+ CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_feat_ix
+ ON :SCHEMA.TranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_geneid_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_id, source_id, gene_source_id, project_id)
+ CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_geneid_ix
+ ON :SCHEMA.TranscriptAttributes (gene_id, source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTransAttr_orthoname_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
+ CREATE UNIQUE INDEX :SCHEMA.TransAttr_orthoname_ix
+ ON :SCHEMA.TranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTransAttr_molwt_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id)
+ CREATE UNIQUE INDEX :SCHEMA.TransAttr_molwt_ix
+ ON :SCHEMA.TranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_ortholog_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes
+ CREATE INDEX :SCHEMA.TransAttr_ortholog_ix
+ ON :SCHEMA.TranscriptAttributes
(source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_orgsrc_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max)
+ CREATE INDEX :SCHEMA.TransAttr_orgsrc_ix
+ ON :SCHEMA.TranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_lwrsrc_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id)
+ CREATE INDEX :SCHEMA.TransAttr_lwrsrc_ix
+ ON :SCHEMA.TranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_species_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id)
+ CREATE INDEX :SCHEMA.TransAttr_species_ix
+ ON :SCHEMA.TranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id)
;
CREATE UNIQUE INDEX TrnscrptAttr_geneinfo
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes
+ ON :SCHEMA.TranscriptAttributes
(gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id,
protein_source_id, na_sequence_id, length, protein_length,
five_prime_utr_length, three_prime_utr_length)
;
- CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_genenaf
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id)
+ CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_genenaf
+ ON :SCHEMA.TranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id)
;
- CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_locsIds_ix
- ON :SCHEMA.:ORG_ABBREVTranscriptAttributes
+ CREATE INDEX :SCHEMA.TransAttr_locsIds_ix
+ ON :SCHEMA.TranscriptAttributes
(na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
index 8b77d66964..ad1c71a2fc 100644
--- a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.:ORG_ABBREVGCent_loc_ix
- on :SCHEMA.:ORG_ABBREVTranscriptCenDistance (genomic_sequence, centromere_distance)
+ create index :SCHEMA.GCent_loc_ix
+ on :SCHEMA.TranscriptCenDistance (genomic_sequence, centromere_distance)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
index 26773299a5..2b05b42f44 100644
--- a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
@@ -1,12 +1,12 @@
- create index :SCHEMA.:ORG_ABBREVTranscriptPath_ix
- on :SCHEMA.:ORG_ABBREVTranscriptPathway
+ create index :SCHEMA.TranscriptPath_ix
+ on :SCHEMA.TranscriptPathway
(gene_source_id, source_id, pathway_source_id,
pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway,
ec_number_pathway, pathway_source)
;
- create index :SCHEMA.:ORG_ABBREVTranscriptPathSource_ix
- on :SCHEMA.:ORG_ABBREVTranscriptPathway (pathway_source,
+ create index :SCHEMA.TranscriptPathSource_ix
+ on :SCHEMA.TranscriptPathway (pathway_source,
gene_source_id, source_id)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
index 73f233d48e..1e1b36617d 100644
--- a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.:ORG_ABBREVXScriptSeq_ix on :SCHEMA.:ORG_ABBREVTranscriptSequence (source_id, project_id)
+ create index :SCHEMA.XScriptSeq_ix on :SCHEMA.TranscriptSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
index e6d76b992a..613aff3e51 100644
--- a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
+++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.:ORG_ABBREVTransDom1_ix
- on :SCHEMA.:ORG_ABBREVTransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology)
+ create index :SCHEMA.TransDom1_ix
+ on :SCHEMA.TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology)
;
From 577e9fd7c9d605e79717e47b92e6919479c33c6f Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Tue, 13 May 2025 14:16:07 -0400
Subject: [PATCH 010/112] wip
---
.../psql/webtables/MO/ChIPchipTranscript.psql | 6 +-
Model/lib/psql/webtables/MO/EqtlSpan.psql | 9 +-
.../webtables/MO/EstAlignmentGeneSummary.psql | 32 ++++---
.../lib/psql/webtables/MO/GeneAttributes.psql | 34 ++++---
.../psql/webtables/MO/GeneCopyNumbers.psql | 13 +--
Model/lib/psql/webtables/MO/GeneGoTable.psql | 21 ++--
Model/lib/psql/webtables/MO/GeneGoTerms.psql | 8 +-
Model/lib/psql/webtables/MO/GeneId.psql | 95 +++++++++++--------
.../lib/psql/webtables/MO/GeneLocations.psql | 8 +-
.../lib/psql/webtables/MO/GeneModelDump.psql | 13 +--
.../psql/webtables/MO/GeneSummaryFilter.psql | 11 +--
.../lib/psql/webtables/MO/GoTermSummary.psql | 16 ++--
.../psql/webtables/MO/IntronUtrCoords.psql | 13 +--
Model/lib/psql/webtables/MO/TFBSGene.psql | 11 ++-
.../psql/webtables/MO/TranscriptPathway.psql | 58 +++--------
Model/lib/xml/tuningManager/tablePruning.txt | 8 +-
Model/lib/xml/tuningManager/webtables.org | 83 +++++++++-------
17 files changed, 224 insertions(+), 215 deletions(-)
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
index 04065d2b26..4e1996000d 100644
--- a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
+++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
@@ -28,10 +28,10 @@
sr.score1 as score
FROM :SCHEMA.TranscriptAttributes ta,
Results.segmentresult sr,
- Study.StudyLink sl,
- Study.Study s
+ Study.nodenodeset sl,
+ Study.nodeset s
WHERE sr.na_sequence_id = ta.na_sequence_id
- AND s.study_id = sl.study_id
+ AND s.node_set_id = sl.node_set_id
AND sl.protocol_app_node_id = sr.protocol_app_node_id
AND lower(s.name) like '%chip%peaks'
AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000)
diff --git a/Model/lib/psql/webtables/MO/EqtlSpan.psql b/Model/lib/psql/webtables/MO/EqtlSpan.psql
index 10215fca8e..fdad100be2 100644
--- a/Model/lib/psql/webtables/MO/EqtlSpan.psql
+++ b/Model/lib/psql/webtables/MO/EqtlSpan.psql
@@ -1,8 +1,6 @@
:CREATE_AND_POPULATE
-
-
- create table eqtlSpan as
- SELECT gene_source_id, project_id, haplotype_block_name as hapblock_id, sequence_id,
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ gene_source_id, haplotype_block_name as hapblock_id, sequence_id,
start_min, end_max, start_max, end_min,
max(score) as lod_score, organism
FROM (SELECT ga.gene_source_id, ga.project_id, gls.haplotype_block_name,
@@ -10,11 +8,12 @@
gls.lod_score_mant * power(10::double precision, gls.lod_score_exp) as score,
replace (ga.organism, ' ', '+') as organism
FROM dots.ChromosomeElementFeature cef, apidb.NAFeatureHaploblock gls,
- dots.ExternalNaSequence ens, dots.NaLocation nl, TranscriptAttributes ga
+ dots.ExternalNaSequence ens, dots.NaLocation nl, :SCHEMA.TranscriptAttributes ga
WHERE gls.na_feature_id = ga.gene_na_feature_id
AND cef.name = gls.haplotype_block_name
AND nl.na_feature_id = cef.na_feature_id
AND cef.na_sequence_id = ens.na_sequence_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
AND (gls.lod_score_mant * power(10::double precision, gls.lod_score_exp)) >= 1.5
) t
GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name,
diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
index 90b4788bce..4dbff1eca8 100644
--- a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
+++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
@@ -1,6 +1,6 @@
- CREATE UNLOGGED TABLE EstAlignmentGene AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp AS
SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession,
e.library_id, ba.query_taxon_id, ba.target_na_sequence_id,
ba.target_taxon_id, ba.percent_identity, ba.is_consistent,
@@ -11,21 +11,23 @@
AS est_gene_overlap_length,
ba.query_bases_aligned / (query_sequence.length)
* 100 AS percent_est_bases_aligned,
- ga.gene_source_id AS gene
+ ga.gene_source_id AS gene,':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM dots.BlatAlignment ba, dots.Est e,
- apidbtuning.TranscriptAttributes ga,
+ :SCHEMA.TranscriptAttributes ga,
dots.NaSequence query_sequence
WHERE e.na_sequence_id = ba.query_na_sequence_id
AND ga.na_sequence_id = ba.target_na_sequence_id
AND least(ba.target_end, ga.gene_end_max) - greatest(ba.target_start, ga.gene_start_min) >= 0
AND query_sequence.na_sequence_id = ba.query_na_sequence_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
;
- CREATE UNLOGGED TABLE EstAlignmentNoGene AS
- SELECT * from EstAlignmentGene WHERE 1=0 UNION /* define datatype for null column */
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp AS
+ SELECT * from :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp WHERE 1=0 UNION /* define datatype for null column */
SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession,
e.library_id, ba.query_taxon_id, ba.target_na_sequence_id,
ba.target_taxon_id, ba.percent_identity, ba.is_consistent,
@@ -34,37 +36,39 @@
NULL AS est_gene_overlap_length,
ba.query_bases_aligned / (query_sequence.length)
* 100 AS percent_est_bases_aligned,
- NULL AS gene
+ NULL AS gene,':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM dots.BlatAlignment ba, dots.Est e, dots.AssemblySequence aseq,
dots.NaSequence sequence, dots.NaSequence query_sequence
WHERE e.na_sequence_id = ba.query_na_sequence_id
AND e.na_sequence_id = query_sequence.na_sequence_id
AND aseq.na_sequence_id = ba.query_na_sequence_id
AND ba.target_na_sequence_id = sequence.na_sequence_id
+ AND sequence.taxon_id = :TAXON_ID
AND ba.blat_alignment_id IN
( /* set of blat_alignment_ids not in in first leg of UNION */
/* (because they overlap no genes) */
SELECT ba.blat_alignment_id
FROM dots.BlatAlignment ba, dots.NaSequence query_sequence,
- sres.OntologyTerm so
+ sres.OntologyTerm so, dots.NaSequence target_sequence,
WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id
AND query_sequence.sequence_ontology_id = so.ontology_term_id
+ AND ba.target_na_sequence_id = target_sequence.na_sequence_id
+ AND target_sequence.taxon_id = :TAXON_ID
AND so.name = 'EST'
EXCEPT
- SELECT blat_alignment_id FROM EstAlignmentGene)
+ SELECT blat_alignment_id FROM :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp)
;
:CREATE_AND_POPULATE
-
- CREATE TABLE EstAlignmentGeneSummary AS
- SELECT * FROM EstAlignmentNoGene
+ SELECT * FROM :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp
UNION
- SELECT * FROM EstAlignmentGene
+ SELECT * FROM :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp
:DECLARE_PARTITION;
-drop table EstAlignmentGene;
-drop table EstAlignmentNoGene;
\ No newline at end of file
+drop table :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp;
+drop table :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp;
diff --git a/Model/lib/psql/webtables/MO/GeneAttributes.psql b/Model/lib/psql/webtables/MO/GeneAttributes.psql
index 8c78e847c9..490b20f223 100644
--- a/Model/lib/psql/webtables/MO/GeneAttributes.psql
+++ b/Model/lib/psql/webtables/MO/GeneAttributes.psql
@@ -1,8 +1,7 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE :ORG_ABBREVGeneAttributes AS
- SELECT DISTINCT project_id
+ SELECT DISTINCT ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
, ta.gene_source_id AS source_id
, gene_na_feature_id AS na_feature_id
, na_sequence_id
@@ -59,11 +58,12 @@
, gene_zoom_context_end as zoom_context_end
, cast (null as numeric) as strain_count
, ta.gene_locations as locations
- FROM :ORG_ABBREVTranscriptAttributes ta
+ FROM :SCHEMA.TranscriptAttributes ta
INNER JOIN (
SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo, MIN(gene_product) AS product,
substr(STRING_AGG(transcript_product, ',' order by transcript_product), 1, 240) as transcript_product
- FROM :ORG_ABBREVTranscriptAttributes
+ FROM :SCHEMA.TranscriptAttributes
+ WHERE org_abbrev = ':ORG_ABBREV'
GROUP BY gene_source_id
) aggregates ON ta.gene_source_id = aggregates.gene_source_id
LEFT JOIN (
@@ -71,11 +71,12 @@
substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id,
substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal
FROM (
- SELECT distinct t.parent_id as na_feature_id, dr.primary_identifier as uniprot_id
- FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t,
+ SELECT distinct t.gene_na_feature_id as na_feature_id, dr.primary_identifier as uniprot_id
+ FROM sres.DbRef dr, dots.DbRefNaFeature x, :SCHEMA.Transcriptattributes t,
sres.ExternalDatabase d, sres.ExternalDatabaseRelease r
WHERE dr.db_ref_id = x.DB_REF_ID
- AND (x.na_feature_id = t.na_feature_id OR x.na_feature_id = t.parent_id)
+ AND t.org_abbrev = ':ORG_ABBREV'
+ AND (x.na_feature_id = t.na_feature_id OR x.na_feature_id = t.gene_na_feature_id)
AND dr.external_database_release_id = r.external_database_release_id
AND r.external_database_id = d.external_database_id
AND (d.name like '%uniprot_dbxref_RSRC'
@@ -86,28 +87,31 @@
GROUP BY na_feature_id
) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id
LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id
+ WHERE ta.org_abbrev = ':ORG_ABBREV'
ORDER BY ta.gene_source_id
-
-
:DECLARE_PARTITION;
- CREATE TABLE :ORG_ABBREVSpeciesInfo as
+ CREATE unlogged TABLE :SCHEMA.:ORG_ABBREVSpeciesInfoTmp as
SELECT genus_species, count(distinct organism) as strain_count
- FROM :ORG_ABBREVGeneAttributes
+ FROM :SCHEMA.GeneAttributes
+ WHERE org_abbrev = ':ORG_ABBREV'
GROUP BY genus_species
;
- UPDATE :ORG_ABBREVGeneAttributes ga
+ UPDATE :SCHEMA.GeneAttributes ga
SET strain_count = (
SELECT strain_count
- FROM :ORG_ABBREVSpeciesInfo si
+ FROM :SCHEMA.:ORG_ABBREVSpeciesInfo si
WHERE si.genus_species = ga.genus_species
)
+ WHERE org_abbrev = ':ORG_ABBREV'
;
+drop table :SCHEMA.:ORG_ABBREVSpeciesInfo
+;
diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql
index ac518347bf..fab9996a93 100644
--- a/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql
+++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql
@@ -1,8 +1,7 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE GeneCopyNumbers AS
- SELECT DISTINCT ta.project_id
+ SELECT DISTINCT ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
, ta.source_id
, ta.gene_source_id
, REGEXP_REPLACE(pan.name, '_[A-Za-z0-9]+ (.+)$', '') AS strain
@@ -17,12 +16,14 @@
, io.output_pan_id
FROM apidb.genecopynumber gcn
, study.protocolappnode pan
- , TranscriptAttributes ta
- , PANIo io
+ , :SCHEMA.TranscriptAttributes ta
+ , :SCHEMA.PANIo io
WHERE gcn.protocol_app_node_id = pan.protocol_app_node_id
AND gcn.na_feature_id = ta.gene_na_feature_id
AND gcn.protocol_app_node_id = io.output_pan_id
AND (ta.gene_type = 'protein coding' or ta.gene_type = 'protein coding gene')
+ AND ta.org_abbrev = ':ORG_ABBREV'
+ AND io.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/GeneGoTable.psql b/Model/lib/psql/webtables/MO/GeneGoTable.psql
index b83c188e7f..25fa844910 100644
--- a/Model/lib/psql/webtables/MO/GeneGoTable.psql
+++ b/Model/lib/psql/webtables/MO/GeneGoTable.psql
@@ -1,12 +1,13 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE GeneGoTable AS
- SELECT source_id, project_id, go_id,
- string_agg(transcript_source_id, ', ' order by transcript_source_id) as transcript_ids,
- is_not,
- max(go_term_name) as go_term_name, ontology, source, evidence_code,
- reference, evidence_code_parameter, sort_key
+ SELECT source_id,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
+ go_id,
+ string_agg(transcript_source_id, ', ' order by transcript_source_id) as transcript_ids,
+ is_not,
+ max(go_term_name) as go_term_name, ontology, source, evidence_code,
+ reference, evidence_code_parameter, sort_key
FROM (SELECT DISTINCT ggt.gene_source_id as source_id, ga.project_id,
replace(ggt.go_id, 'GO_', 'GO:') as go_id,
ggt.transcript_source_id,
@@ -14,8 +15,10 @@
ggt.go_term_name, ggt.ontology, ggt.source, ggt.evidence_code,
ggt.reference, ggt.evidence_code_parameter,
substr(ggt.ontology, 1, 1) || replace(ggt.go_id, 'GO_', 'GO:') as sort_key
- FROM GeneGoTerms ggt, GeneAttributes ga
+ FROM :SCHEMA.GeneGoTerms ggt, :SCHEMA.GeneAttributes ga
WHERE ggt.gene_source_id = ga.source_id
+ and ggt.org_abbrev = ':ORG_ABBREV'
+ and ga.org_abbrev = ':ORG_ABBREV'
) t
GROUP BY source_id, project_id, go_id, is_not, ontology,
source, evidence_code, reference, evidence_code_parameter, sort_key
diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms.psql b/Model/lib/psql/webtables/MO/GeneGoTerms.psql
index 4cd39239a2..87fec450b8 100644
--- a/Model/lib/psql/webtables/MO/GeneGoTerms.psql
+++ b/Model/lib/psql/webtables/MO/GeneGoTerms.psql
@@ -1,13 +1,11 @@
:CREATE_AND_POPULATE
-
-
- create table :ORG_ABBREVGeneGoTerms as
with root_term
as (select ontology_term_id,
cast(initcap(replace(name, '_', ' ')) as varchar(20)) as ontology
from sres.OntologyTerm
where source_id in ('GO_0008150','GO_0003674','GO_0005575'))
- select gf.source_id as gene_source_id, t.source_id as transcript_source_id, taf.aa_sequence_id,
+ select ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ gf.source_id as gene_source_id, t.source_id as transcript_source_id, taf.aa_sequence_id,
cast (CASE ga.is_not WHEN 0 THEN '' WHEN 1 THEN 'not' ELSE ga.is_not::varchar END as varchar(3)) as is_not, ns.taxon_id,
cast (gt.source_id as varchar(20)) as go_id,
gt.ontology_term_id as go_term_id, rt.ontology,
@@ -22,7 +20,7 @@
sres.OntologyTerm gt LEFT JOIN root_term rt ON gt.ancestor_term_id = rt.ontology_term_id
where t.parent_id = gf.na_feature_id
and gf.na_sequence_id = ns.na_sequence_id
- and (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+ and ns.taxon_id = :TAXON_ID
and t.na_feature_id = taf.na_feature_id
and taf.aa_sequence_id = ga.row_id
and ga.table_id = (select table_id
diff --git a/Model/lib/psql/webtables/MO/GeneId.psql b/Model/lib/psql/webtables/MO/GeneId.psql
index 41e89c83f0..7feaf7ebc2 100644
--- a/Model/lib/psql/webtables/MO/GeneId.psql
+++ b/Model/lib/psql/webtables/MO/GeneId.psql
@@ -1,14 +1,29 @@
-:CREATE_AND_POPULATE
+create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as
+(select gf.na_feature_id
+ , gf.na_sequence_id
+ , gf.external_database_release_id
+ , gf.is_predicted
+ , gf.source_id
+ from dots.genefeature gf
+ , dots.nasequence nas
+ where gf.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
+)
+;
-
- CREATE TABLE :ORG_ABBREVGeneId AS
+create index :SCHEMA.:ORG_ABBREVGeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id)
+;
+
+
+:CREATE_AND_POPULATE
SELECT substr(mapping.id, 1, 100) as id, mapping.gene, cast (0 as NUMERIC(1)) as unique_mapping,
SUBSTR(string_agg(distinct union_member,'; ' order by union_member), 1, 100) as union_member,
- SUBSTR(string_agg(distinct database_name,'; ' order by database_name), 1, 200) as database_name
+ SUBSTR(string_agg(distinct database_name,'; ' order by database_name), 1, 200) as database_name,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (SELECT substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) AS id,
gf.source_id AS gene,
'Transcript.protein_id before dot' as union_member, ed.name as database_name /* dots.Transcript.protein_id, trimmed at period */
- FROM dots.Transcript t, dots.GeneFeature gf,
+ FROM dots.Transcript t, :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf,
sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
WHERE t.parent_id = gf.na_feature_id
AND substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) IS NOT NULL
@@ -18,7 +33,7 @@
SELECT t.protein_id AS id,
gf.source_id AS gene,
'Transcript.protein_id' as union_member, ed.name as database_name /* dots.Transcript.protein_id */
- FROM dots.Transcript t, dots.GeneFeature gf,
+ FROM dots.Transcript t, :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf,
sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
WHERE t.parent_id = gf.na_feature_id
AND t.protein_id IS NOT NULL
@@ -28,7 +43,7 @@
SELECT dr.primary_identifier AS id,
gf.source_id AS gene,
'DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
- FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
sres.DbRef dr, sres.ExternalDatabaseRelease edr,
sres.ExternalDatabase ed
WHERE dr.primary_identifier IS NOT NULL
@@ -46,7 +61,7 @@
SELECT dr.primary_identifier AS id,
gf.source_id AS gene,
'DbRef.primary_identifier on Transcript' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
- FROM dots.GeneFeature gf, dots.Transcript t, dots.DbRefNaFeature drnf,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.DbRefNaFeature drnf,
sres.DbRef dr, sres.ExternalDatabaseRelease edr,
sres.ExternalDatabase ed
WHERE dr.primary_identifier IS NOT NULL
@@ -64,7 +79,7 @@
SELECT dr.primary_identifier AS id,
gf.source_id AS gene,
'DbRef.primary_identifier on Gene' as union_member, ed.name as database_name
- FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
sres.DbRef dr, sres.ExternalDatabaseRelease edr,
sres.ExternalDatabase ed
WHERE dr.primary_identifier IS NOT NULL
@@ -77,7 +92,7 @@
SELECT dr.primary_identifier AS id,
gf.source_id AS gene,
'VectorBase alternate names' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */
- FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
sres.DbRef dr, sres.ExternalDatabaseRelease edr,
sres.ExternalDatabase ed
WHERE dr.primary_identifier IS NOT NULL
@@ -91,7 +106,7 @@
SELECT dr.primary_identifier AS id,
gf.source_id AS gene,
'synonym' as union_member, ed.name as database_name
- FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
sres.DbRef dr, sres.ExternalDatabaseRelease edr,
sres.ExternalDatabase ed
WHERE dr.primary_identifier IS NOT NULL
@@ -104,7 +119,7 @@
SELECT dr.secondary_identifier AS id,
gf.source_id AS gene,
'DbRef.secondary_identifier' as union_member, ed.name as database_name /* sres.DbRef.secondary_identifier */
- FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf,
sres.DbRef dr, sres.ExternalDatabaseRelease edr,
sres.ExternalDatabase ed
WHERE dr.secondary_identifier IS NOT NULL
@@ -123,7 +138,7 @@
SELECT dr.primary_identifier AS id,
gf.source_id AS gene,
'genbank DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier for Genbank records */
- FROM dots.GeneFeature gf, dots.Transcript t, dots.DbRefNaSequence drns,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.DbRefNaSequence drns,
sres.DbRef dr, sres.ExternalDatabaseRelease edr,
sres.ExternalDatabase ed
WHERE gf.na_feature_id = t.parent_id
@@ -135,12 +150,15 @@
UNION
SELECT pred_loc.feature_source_id AS id,
gene_loc.feature_source_id AS gene,
- 'overlapping predicted gene source_id' as union_member, ed.name as database_name /* dots.GeneFeature.source_id for predicted genes that overlap */
+ 'overlapping predicted gene source_id' as union_member, ed.name as database_name /* dots.genefeature.source_id for predicted genes that overlap */
FROM apidb.FeatureLocation gene_loc, apidb.FeatureLocation pred_loc,
- sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed,
+ dots.nasequence nas
WHERE pred_loc.feature_type = 'GenePrediction'
AND gene_loc.feature_type = 'GeneFeature'
AND pred_loc.na_sequence_id = gene_loc.na_sequence_id
+ AND gene_loc.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
AND gene_loc.start_min <= pred_loc.end_max
AND gene_loc.end_max >= pred_loc.start_min
AND pred_loc.is_reversed = gene_loc.is_reversed
@@ -149,7 +167,7 @@
UNION
SELECT ng.name AS id, gf.source_id AS gene,
'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */
- FROM dots.GeneFeature gf, dots.NaFeatureNaGene nfng, dots.NaGene ng,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.NaFeatureNaGene nfng, dots.NaGene ng,
sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
WHERE gf.na_feature_id = nfng.na_feature_id
AND ng.na_gene_id = nfng.na_gene_id
@@ -158,14 +176,14 @@
UNION
SELECT source_id AS id, source_id AS gene,
'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */
- FROM dots.GeneFeature gf,
+ FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf,
sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
WHERE gf.external_database_release_id = edr.external_database_release_id
AND edr.external_database_id = ed.external_database_id
UNION
SELECT n.name AS id, gf.source_id AS gene,
'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name
- from dots.genefeature gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
+ from :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
( select na_feature_id, name
from apidb.GeneFeatureName
where is_preferred = 1
@@ -186,7 +204,7 @@
gf.source_id as gene,
'AA feature DbRef primary ID' as union_member,
ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */
- from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf,
+ from :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.TranslatedAaFeature taf,
dots.DbRefAaFeature draf, sres.DbRef dr,
sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
where gf.na_feature_id = t.parent_id
@@ -200,40 +218,39 @@
'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER',
'PRINTS','SCANPROSITE','SFLD')
) mapping,
- dots.GeneFeature gf, dots.NaSequence ns
+ :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.NaSequence ns
WHERE mapping.gene = gf.source_id
AND gf.na_sequence_id = ns.na_sequence_id
- AND (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
AND (gf.is_predicted != 1 OR gf.is_predicted is null)
GROUP BY mapping.id, mapping.gene
-
-
:DECLARE_PARTITION;
- INSERT INTO :ORG_ABBREVGeneId
- (id, gene, unique_mapping, union_member, database_name)
+ INSERT INTO :SCHEMA.GeneId
+ (id, gene, unique_mapping, union_member, database_name, project_id, org_abbrev, modification_date)
WITH munge
AS (SELECT DISTINCT
regexp_replace(id, '\.\d\d?$', '') as id,
gene, unique_mapping, union_member, database_name
- FROM :ORG_ABBREVGeneId
+ FROM :SCHEMA.GeneId
+ WHERE org_abbrev = ':ORG_ABBREV'
-- CHECK AND FIX
--WHERE regexp_like(id, '(.*)\.\d\d?$')
)
- SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name
+ SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM munge
- WHERE id NOT IN (SELECT id FROM :ORG_ABBREVGeneId )
+ WHERE id NOT IN (SELECT id FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV')
;
- CREATE UNLOGGED TABLE :ORG_ABBREVOneGeneIds (lower_id) AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id) AS
SELECT lower_id
FROM (SELECT DISTINCT lower(id) as lower_id, gene
- FROM :ORG_ABBREVGeneId
+ FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV'
) t
GROUP BY lower_id
HAVING count(*) = 1
@@ -242,24 +259,28 @@
- CREATE UNIQUE INDEX gix _pk ON :ORG_ABBREVOneGeneIds (lower_id)
-
+ CREATE UNIQUE INDEX :ORG_ABBREV_gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id)
;
- UPDATE :ORG_ABBREVGeneId
+ UPDATE :SCHEMA.GeneId
SET unique_mapping = 1
WHERE id = gene
-
+ and org_abbrev = ':ORG_ABBREV'
;
- UPDATE :ORG_ABBREVGeneId
+ UPDATE :SCHEMA.GeneId
SET unique_mapping = 1
- WHERE lower(id) IN (select lower_id from :ORG_ABBREVOneGeneIds)
-
+ WHERE lower(id) IN (select lower_id from :SCHEMA.:ORG_ABBREVOneGeneIdsTmp)
+ and org_abbrev = ':ORG_ABBREV'
;
+
+drop table :SCHEMA.:ORG_ABBREVOneGeneIdsTmp
+;
+drop table :SCHEMA.:ORG_ABBREVGeneFeatureTmp
+;
diff --git a/Model/lib/psql/webtables/MO/GeneLocations.psql b/Model/lib/psql/webtables/MO/GeneLocations.psql
index aae59b60e7..ad129a3f1b 100644
--- a/Model/lib/psql/webtables/MO/GeneLocations.psql
+++ b/Model/lib/psql/webtables/MO/GeneLocations.psql
@@ -1,9 +1,7 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE :ORG_ABBREVGeneLocations AS
SELECT source_id,
- string_agg(location, '; ' order by is_top_level desc) as locations
+ string_agg(location, '; ' order by is_top_level desc) as locations,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (SELECT fl.feature_source_id as source_id, fl.is_top_level,
fl.sequence_source_id || ':'
|| trim(to_char(fl.start_min,'999,999,999')) || '..'
@@ -13,7 +11,7 @@
FROM apidb.FeatureLocation fl, dots.NaSequence ns
WHERE fl.feature_type = 'GeneFeature'
AND fl.na_sequence_id = ns.na_sequence_id
- AND (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+ AND ns.taxon_id = :TAXON_ID
) t
GROUP BY source_id
diff --git a/Model/lib/psql/webtables/MO/GeneModelDump.psql b/Model/lib/psql/webtables/MO/GeneModelDump.psql
index 698e4fc44f..2ee1bcdf1e 100644
--- a/Model/lib/psql/webtables/MO/GeneModelDump.psql
+++ b/Model/lib/psql/webtables/MO/GeneModelDump.psql
@@ -1,27 +1,28 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE GeneModelDump as
SELECT source_id, project_id, sequence_id, gm_start,gm_end, type, is_reversed,
- string_agg(transcript_id, ',' ORDER BY transcript_id) AS transcript_ids
+ string_agg(transcript_id, ',' ORDER BY transcript_id) AS transcript_ids,
+ ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (
SELECT distinct ta.source_id as transcript_id, ta.gene_source_id as source_id, ta.project_id,
ta.sequence_id,gm.start_min as gm_start, gm.end_max as gm_end,
gm.type, gl.is_reversed
FROM
apidb.FeatureLocation gl, dots.NaSequence s,
- TranscriptAttributes ta,
+ SCHEMA.TranscriptAttributes ta,
(
SELECT CASE el.feature_type WHEN 'ExonFeature' THEN 'Exon' ELSE el.feature_type END as type,
el.parent_id as na_feature_id, el.start_min as start_min, el.end_max as end_max
- FROM apidb.FeatureLocation el
+ FROM apidb.FeatureLocation el, dots.nasequence nas
WHERE el.feature_type in ('ExonFeature','five_prime_UTR', 'three_prime_UTR','CDS','Intron')
AND el.is_top_level = 1
+ AND nas.na_sequence_id = el.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
) gm
WHERE gm.na_feature_id = ta.na_feature_id
AND s.na_sequence_id = gl.na_sequence_id
AND ta.na_feature_id = gl.na_feature_id
AND gl.is_top_level = 1
+ AND ta.org_abbrev = ':ORG_ABBREV'
) t
GROUP BY source_id, project_id, sequence_id,
gm_start, gm_end, type, is_reversed
diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql b/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql
index c691f5b8ce..02e0664c2e 100644
--- a/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql
+++ b/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql
@@ -1,13 +1,12 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE GeneSummaryFilter AS
- SELECT CAST(filter_name AS VARCHAR(80)) AS filter_name
+ SELECT CAST(filter_name AS VARCHAR(80)) AS filter_name,':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (SELECT species as filter_name
- FROM GeneAttributes
+ FROM :SCHEMA.GeneAttributes
+ WHERE org_abbrev = ':ORG_ABBREV'
UNION
SELECT organism as filter_name
- FROM GeneAttributes) t
+ FROM :SCHEMA.GeneAttributes
+ WHERE org_abbrev = ':ORG_ABBREV') t
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/GoTermSummary.psql b/Model/lib/psql/webtables/MO/GoTermSummary.psql
index 7757364b49..a233539f0c 100644
--- a/Model/lib/psql/webtables/MO/GoTermSummary.psql
+++ b/Model/lib/psql/webtables/MO/GoTermSummary.psql
@@ -1,8 +1,6 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE :ORG_ABBREVGoTermSummary AS
- SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
ggt.taxon_id, ggt.is_not, replace(ggt.go_id, '_', ':') as go_id,
ggt.go_term_id, ggt.ontology,
replace(ggt.go_term_name, '_',' ') as go_term_name, ggt.source,
@@ -14,13 +12,14 @@
when gs.ontology_term_id is null then 0
else 1
end as is_go_slim
- FROM :ORG_ABBREVGeneGoTerms ggt
- LEFT JOIN :ORG_ABBREVOntologyLevels ol ON ggt.go_term_id = ol.ontology_term_id
+ FROM :SCHEMA.GeneGoTerms ggt
+ LEFT JOIN :SCHEMA.OntologyLevels ol ON ggt.go_term_id = ol.ontology_term_id
LEFT JOIN (
SELECT distinct ontology_term_id
FROM apidb.GoSubset
WHERE go_subset_term = 'goslim_generic'
) gs ON ggt.go_term_id = gs.ontology_term_id
+ WHERE ggt.org_abbrev = ':ORG_ABBREV'
UNION
SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
ggt.taxon_id, ggt.is_not,
@@ -36,10 +35,10 @@
when gs.ontology_term_id is null then 0
else 1
end as is_go_slim
- FROM :ORG_ABBREVGeneGoTerms ggt, sres.OntologyRelationship orel,
+ FROM :SCHEMA.GeneGoTerms ggt, sres.OntologyRelationship orel,
sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr,
sres.OntologyTerm ot
- LEFT JOIN :ORG_ABBREVOntologyLevels ol ON ot.ontology_term_id = ol.ontology_term_id
+ LEFT JOIN :SCHEMA.OntologyLevels ol ON ot.ontology_term_id = ol.ontology_term_id
LEFT JOIN (
SELECT distinct ontology_term_id
FROM apidb.GoSubset
@@ -50,6 +49,7 @@
AND edr.external_database_release_id = ot.external_database_release_id
AND edr.external_database_id = ed.external_database_id
AND ed.name ='GO_RSRC'
+ WHERE ggt.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords.psql
index ac598bbc08..cfed91dac6 100644
--- a/Model/lib/psql/webtables/MO/IntronUtrCoords.psql
+++ b/Model/lib/psql/webtables/MO/IntronUtrCoords.psql
@@ -1,9 +1,7 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE IntronUtrCoords AS
SELECT na_feature_id, source_id,
- '[' || regexp_replace(string_agg(text,',' ORDER BY start_min), '.quot;', '"' ) || ']' AS gen_rel_intron_utr_coords
+ '[' || regexp_replace(string_agg(text,',' ORDER BY start_min), '.quot;', '"' ) || ']' AS gen_rel_intron_utr_coords,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (
SELECT na_feature_id, source_id, start_min,
'["' || feature_type || '",' || start_min || ',' || end_max || ']' AS text
@@ -20,16 +18,15 @@
ELSE fl.end_max - tl.start_min + 1
END AS end_max
FROM
- apidb.TranscriptLocation tl, apidb.FeatureLocation fl
+ apidb.TranscriptLocation tl, apidb.FeatureLocation fl, dots.nasequence nas
WHERE
tl.na_feature_id = fl.parent_id
+ AND fl.na_sequence_id = nas.na_sequence_id
+ AND nas.taxon_id = :TAXON_ID
AND fl.feature_type in('UTR', 'Intron')
AND tl.is_top_level = 1
AND fl.is_top_level = 1
) t1
) t2
GROUP BY na_feature_id, source_id
-
-
:DECLARE_PARTITION;
-
diff --git a/Model/lib/psql/webtables/MO/TFBSGene.psql b/Model/lib/psql/webtables/MO/TFBSGene.psql
index 3a7103d8ac..882cf31ab9 100644
--- a/Model/lib/psql/webtables/MO/TFBSGene.psql
+++ b/Model/lib/psql/webtables/MO/TFBSGene.psql
@@ -1,8 +1,8 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE TFBSGene AS
SELECT DISTINCT
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date
ga.source_id as gene_source_id,
ga.organism as organism,
ga.genus_species as species,
@@ -35,9 +35,10 @@
aef.*
FROM dots.BindingSiteFeature aef,
apidb.FeatureLocation arrloc,
- GeneAttributes ga
+ :SCHEMA.GeneAttributes ga
WHERE aef.na_feature_id = arrloc.na_feature_id
- AND arrloc.na_sequence_id = ga.na_sequence_id
+ AND arrloc.na_sequence_id = ga.na_sequence_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000)
or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) )
diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway.psql b/Model/lib/psql/webtables/MO/TranscriptPathway.psql
index 4b1c95b668..1490621a67 100644
--- a/Model/lib/psql/webtables/MO/TranscriptPathway.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptPathway.psql
@@ -1,42 +1,14 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE TranscriptPathway (
- SOURCE_ID VARCHAR(80),
- GENE_SOURCE_ID VARCHAR(80),
- PROJECT_ID VARCHAR(20),
- PATHWAY_SOURCE_ID VARCHAR(50),
- PATHWAY_NAME VARCHAR(150),
- EC_NUMBER_GENE VARCHAR(16),
- WILDCARD_COUNT_GENE NUMERIC,
- EC_NUMBER_PATHWAY VARCHAR(16),
- WILDCARD_COUNT_PATHWAY NUMERIC,
- EXACT_MATCH NUMERIC,
- COMPLETE_EC NUMERIC,
- PATHWAY_ID NUMERIC(12,0),
- PATHWAY_SOURCE VARCHAR(200),
- EXTERNAL_DATABASE_RELEASE_ID NUMERIC(10,0)
- )
-
-
-:DECLARE_PARTITION;
-
-
-
- DO $$
- DECLARE
- idlist RECORD;
- BEGIN
- FOR idlist IN ( SELECT DISTINCT organism FROM GeneAttributes )
- LOOP
- INSERT INTO TranscriptPathway
WITH transcript_ec AS (
SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
-- CHECK AND FIX
-- regexp_count( ec.ec_number, '-') as wildcard_count
count( ec.ec_number) as wildcard_count
FROM sres.EnzymeClass ec
- WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM dots.AaSequenceEnzymeClass)
+ WHERE enzyme_class_id IN (SELECT aseqEc.enzyme_class_id
+ FROM dots.AaSequenceEnzymeClass aseqEc, dots.aasequence seq
+ WHERE aseqEc.aa_sequence_id = seq.aa_sequence_id
+ AND seq.taxon_id = :TAXON_ID)
GROUP BY ec.enzyme_class_id
),
pathway_node_ec AS (
@@ -86,27 +58,23 @@
, pa.pathway_id
, pa.pathway_source
, p.external_database_release_id
- FROM PathwayAttributes pa
+ FROM :SCHEMA.PathwayAttributes pa
, sres.pathway p
, pathway_node_ec pec
, ec_match
, dots.AaSequenceEnzymeClass asec
- , TranscriptAttributes ga
- WHERE ga.organism = idlist.organism
+ , :SCHEMA.TranscriptAttributes ga
+ WHERE ga.org_abbrev = ':ORG_ABBREV'
AND pa.pathway_id = pec.pathway_id
AND p.pathway_id = pa.pathway_id
AND pec.enzyme_class_id = ec_match.pathway_enzyme_class_id
AND asec.enzyme_class_id = ec_match.transcript_enzyme_class_id
AND ga.aa_sequence_id = asec.aa_sequence_id
- AND (
- (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived')
- OR ga.orthomcl_name IS NOT NULL
- )
- ;
- commit;
- END LOOP;
- END;
- $$ LANGUAGE PLPGSQL;
-
+ -- TODO: Need new downstream table to bring in OrthoMCLDerived EC associations
+ -- AND (
+ -- (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived')
+ -- OR ga.orthomcl_name IS NOT NULL
+ -- )
;
+:DECLARE_PARTITION;
diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt
index 6f0b461f33..dfcfa55e0e 100644
--- a/Model/lib/xml/tuningManager/tablePruning.txt
+++ b/Model/lib/xml/tuningManager/tablePruning.txt
@@ -3,11 +3,11 @@ MG
??
??
??
-??
+K
??
??
-??
-??
+K
+MO
R
??
??
@@ -15,7 +15,7 @@ R
??
??
??
-??
+K
??
??
MO
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index 6373429173..4b7c8e9056 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -36,54 +36,67 @@
- [X] ProteinAttributes.psql
- [ ] TranscriptAttributes_ix.psql
- [ ] TranscriptAttributes.psql
- - [s] CodingSequence_ix.psql
- - [s] CodingSequence.psql
- - [ ] IntronUtrCoords_ix.psql
- - [ ] IntronUtrCoords.psql
- - [s] TranscriptCenDistance_ix.psql
- - [s] TranscriptCenDistance.psql
+ - [X] CodingSequence_ix.psql
+ - [X] CodingSequence.psql
+ - [X] IntronUtrCoords_ix.psql
+ - [X] IntronUtrCoords.psql
+ - [X] TranscriptCenDistance_ix.psql
+ - [X] TranscriptCenDistance.psql
- [ ] TranscriptPathway_ix.psql
- [ ] TranscriptPathway.psql
- - [s] TranscriptSequence_ix.psql
- - [s] TranscriptSequence.psql
- - [s] ChIPchipTranscript_ix.psql
- - [s] ChIPchipTranscript.psql
+ - This may need to move to comparative genomics because we need the OrthoDerived EC mappings
+ - [X] TranscriptSequence_ix.psql
+ - [X] TranscriptSequence.psql
+ - [X] ChIPchipTranscript_ix.psql
+ - [X] ChIPchipTranscript.psql
- Gene
- - [ ] GeneId_ix.psql
- - [ ] GeneId.psql
- - [ ] GeneAttributes_ix.psql
- - [ ] GeneAttributes.psql
- - [ ] GeneCopyNumbers_ix.psql
- - [ ] GeneCopyNumbers.psql
- - [ ] GeneGoTable_ix.psql
- - [ ] GeneGoTable.psql
- - [ ] GeneGoTerms_ix.psql
- - [ ] GeneGoTerms.psql
- - [ ] GeneLocations_ix.psql
- - [ ] GeneLocations.psql
- - [ ] GeneModelDump_ix.psql
- - [ ] GeneModelDump.psql
- - [ ] GeneSummaryFilter_ix.psql
- - [ ] GeneSummaryFilter.psql
- - [ ] TFBSGene_ix.psql
- - [ ] TFBSGene.psql
+ - [X] GeneId_ix.psql
+ - [X] GeneId.psql
+ - [X] GeneAttributes_ix.psql
+ - [X] GeneAttributes.psql
+ - [X] GeneCopyNumbers_ix.psql
+ - [X] GeneCopyNumbers.psql
+ - [X] GeneGoTable_ix.psql
+ - [X] GeneGoTable.psql
+ - [X] GeneGoTerms_ix.psql
+ - [X] GeneGoTerms.psql
+ - [X] GeneLocations_ix.psql
+ - [X] GeneLocations.psql
+ - [X] GeneModelDump_ix.psql
+ - [X] GeneModelDump.psql
+ - [X] GeneSummaryFilter_ix.psql
+ - [X] GeneSummaryFilter.psql
+ - [X] TFBSGene_ix.psql
+ - [X] TFBSGene.psql
- [ ] PathwayNodeGene_ix.psql
+ - This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- [ ] PathwayNodeGene.psql
+ - This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- [ ] PathwaysGeneTable_ix.psql
+ - This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- [ ] PathwaysGeneTable.psql
- - [ ] GoTermSummary_ix.psql
- - [ ] GoTermSummary.psql
- - [ ] EqtlSpan_ix.psql
- - [ ] EqtlSpan.psql
+ - This may need to move to comparative genomics because we need the OrthoDerived EC mappings
+ - [X] GoTermSummary_ix.psql
+ - [X] GoTermSummary.psql
+ - [X] EqtlSpan_ix.psql
+ - [X] EqtlSpan.psql
- EST
- - [ ] EstAlignmentGeneSummary_ix.psql
- - [ ] EstAlignmentGeneSummary.psql
- [ ] EstAttributes_ix.psql
+ - move to comparative
+ - join to apidb.organism and filter by "is_reference_strain"
- [ ] EstAttributes.psql
+ - move to comparative
+ - join to apidb.organism and filter by "is_reference_strain"
- [ ] EstSequence_ix.psql
+ - move to comparative
+ - join to apidb.organism and filter by "is_reference_strain"
- [ ] EstSequence.psql
+ - move to comparative
+ - join to apidb.organism and filter by "is_reference_strain"
+ - [X] EstAlignmentGeneSummary_ix.psql
+ - [X] EstAlignmentGeneSummary.psql
- Dataset / Other
- [ ] DatasetExampleSourceId_ix.psql
@@ -111,4 +124,6 @@
- [ ] NameMappingGIJ_ix.psql
- [ ] NameMappingGIJ.psql
- [ ] GeneMaxIntronGIJ_ix.psql
+ - should be aux table and dropped
- [ ] GeneMaxIntronGIJ.psql
+ - should be aux table and dropped
From 706a5611763e66f9ea1c42a9745d02fb17c9ef55 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 13 May 2025 16:04:56 -0400
Subject: [PATCH 011/112] convert some more MOs
---
.../lib/psql/webtables/MO/ChrCopyNumbers.psql | 7 +-
.../webtables/MO/DatasetExampleSourceId.psql | 11 +-
.../psql/webtables/MO/OrganismAttributes.psql | 100 +++++++++---------
Model/lib/xml/tuningManager/webtables.org | 8 +-
4 files changed, 62 insertions(+), 64 deletions(-)
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
index d22fda9dd9..8b4b6d5ca1 100644
--- a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
+++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
@@ -1,15 +1,12 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE ChrCopyNumbers AS
SELECT DISTINCT ta.na_sequence_id
, ta.chromosome
, ccn.chr_copy_number AS ploidy
, io.input_pan_id
, io.output_pan_id
FROM apidb.ChrCopyNumber ccn
- , TranscriptAttributes ta
- , PANIo io
+ , :SCHEMA.TranscriptAttributes ta
+ , :SCHEMA.PANIo io
WHERE ta.na_sequence_id = ccn.na_sequence_id
AND ta.chromosome IS NOT NULL
AND ccn.protocol_app_node_id = io.output_pan_id
diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
index 9d7b830c6d..3d10c6319c 100644
--- a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
+++ b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
@@ -1,20 +1,19 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE DatasetExampleSourceId AS
WITH profiles AS (
SELECT p.source_id,
- ga.project_id,
ga.sequence_id,
d.name,
row_number() over(partition by d.name
order by ga.chromosome_order_num, p.profile_as_string desc) as rn
FROM Profile p
INNER JOIN sres.ExternalDatabase d ON p.dataset_name = d.name
- LEFT JOIN GeneAttributes ga ON p.source_id = ga.source_id
+ LEFT JOIN :SCHEMA.GeneAttributes ga ON p.source_id = ga.source_id
WHERE p.profile_as_string is not null
+ and ga.org_abbrev = ':ORG_ABBREV'
)
- SELECT p.source_id as example_source_id, p.project_id, p.sequence_id, p.name as dataset
+ SELECT
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ p.source_id as example_source_id, p.sequence_id, p.name as dataset
FROM profiles p
WHERE p.rn = 1
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
index 1a38fed293..9e49fb17a9 100644
--- a/Model/lib/psql/webtables/MO/OrganismAttributes.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
@@ -1,5 +1,3 @@
-
-
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVDataSourceCount AS
SELECT
taxon_id,
@@ -13,34 +11,37 @@
FROM dots.externalNAsequence enas, SRES.ontologyterm ot
WHERE enas.sequence_ontology_id = ot.ontology_term_id
AND ot.name in( 'mitochondrial_chromosome','apicoplast_chromosome')
- GROUP BY enas.taxon_id
+ and enas.taxon_id = :TAXON_ID
+ GROUP BY enas.taxon_id
UNION
SELECT distinct ds.taxon_id, 'HTSIsolate' AS stype, 1 AS num
FROM apidb.DataSource ds
WHERE ds.type = 'isolates' AND ds.subtype = 'HTS_SNP'
- GROUP BY taxon_id
+ AND ds.taxon_id = :TAXON_ID
+ GROUP BY taxon_id
UNION
SELECT distinct ds.taxon_id, 'Popset' AS stype, 1 AS num
FROM apidb.DataSource ds
WHERE ds.subtype = 'sequenceing_types'
+ AND ds.taxon_id = :TAXON_ID
GROUP BY taxon_id
UNION
SELECT distinct ds.taxon_id, 'Epitope' AS stype, 1 AS num
FROM apidb.DataSource ds
WHERE ds.type = 'epitope'
+ AND ds.taxon_id = :TAXON_ID
GROUP BY taxon_id
UNION
SELECT distinct ds.taxon_id, 'Array' AS stype, 1 AS num
FROM apidb.DataSource ds
WHERE ds.type = 'transcript_expression'
AND ds.subtype = 'array'
+ AND ds.taxon_id = :TAXON_ID
GROUP BY taxon_id
) t
GROUP BY taxon_id
;
-
-
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVOrganismCentromere AS
SELECT distinct s.taxon_id,
@@ -51,11 +52,10 @@
WHERE ot.ontology_term_id = f.sequence_ontology_id
AND ot.name='centromere'
AND f.na_sequence_id = s.na_sequence_id
- GROUP BY s.taxon_id
+ AND s.taxon_id = :TAXON_ID
+ GROUP BY s.taxon_id
;
-
-
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVSequenceCount AS
SELECT
@@ -65,21 +65,21 @@
max(CASE WHEN sequence_type = 'chromosome' THEN num ELSE null END) as chrom_num
FROM (
SELECT count(*) as num, sequence_type, taxon_id
- FROM GenomicSeqAttributes
+ FROM :SCHEMA.GenomicSeqAttributes
WHERE is_top_level =1
- GROUP BY taxon_id, sequence_type
+ and org_abbrev = :ORG_ABBREV
+ GROUP BY taxon_id, sequence_type
) t
GROUP BY taxon_id
;
-
-
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVCommunityCount AS
-- SELECT taxon_id, count(*) as communityCount
-- TODO: addd this back
select taxon_id, 0 as communityCount
- FROM GeneAttributes
+ FROM :SCHEMA.GeneAttributes
+ where org_abbrev = ':ORG_ABBREV'
--WHERE
--(source_id, project_id) IN (
--SELECT distinct stable_id, project_name
@@ -115,7 +115,7 @@
else ''
end)) as geneArrayCount
FROM Profile p
- RIGHT OUTER JOIN GeneAttributes ga ON ga.source_id = p.source_id
+ RIGHT OUTER JOIN :SCHEMA.GeneAttributes ga ON ga.source_id = p.source_id and ga.org_abbrev = ':ORG_ABBREV'
GROUP BY ga.taxon_id
;
@@ -134,6 +134,7 @@
AND t.table_id = s.query_table_id
AND t.name = 'ExternalNASequence'
AND s.pvalue_exp <= -10
+ and nas.taxon_id = :TAXON_ID
) sim LEFT JOIN
(SELECT i.source_id, seq.source_id as sequence_id
FROM dots.similarity s, PopsetAttributes i, GeneAttributes g,
@@ -146,6 +147,7 @@
AND s.max_subject_end >= g.start_min
AND g.na_sequence_id = seq.na_sequence_id
AND t.name = 'ExternalNASequence'
+ and seq.taxon_id = :TAXON_ID
) gene
ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id)
GROUP BY sim.taxon_id
@@ -160,7 +162,7 @@
genomestat.database_version,
genomestat.ncbi_tax_id,
genomestat.Megabps,
- coalesce(snpCount.ct,0) as snpCount,
+ -- coalesce(snpCount.ct,0) as snpCount,
coalesce(count(distinct ga.source_id),0) as geneCount,
coalesce(count(distinct case when ga.is_pseudo =1 then ga.source_id else '' end),0) as pseudoGeneCount,
coalesce(count(distinct case when (ga.gene_type ='protein coding' or ga.gene_type ='protein coding gene') then ga.source_id else '' end),0) as codingGeneCount,
@@ -185,32 +187,32 @@
then mss.aa_sequence_id
else NULL
end)),0) proteomicsCount,
- coalesce(count (distinct (case when ga.is_deprecated = 0
- then est.source_id
- else NULL
- end)),0) estCount,
+ -- coalesce(count (distinct (case when ga.is_deprecated = 0
+ -- then est.source_id
+ -- else NULL
+ -- end)),0) estCount,
coalesce(count (distinct (case when (ga.is_deprecated = 0 and ta.ec_numbers is not null)
then ga.source_id
else NULL
end)),0) ecNumberCount
FROM GeneAttributes ga
LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id
- LEFT OUTER JOIN gotermsummary gts on ga.source_id = gts.gene_source_id
- LEFT OUTER JOIN TFBSGene tfbs on ga.source_id = tfbs.gene_source_id
- LEFT OUTER JOIN TranscriptAttributes ta on ta.gene_source_id = ga.source_id
+ LEFT OUTER JOIN :SCHEMA.gotermsummary gts on ga.source_id = gts.gene_source_id and gts.org_abbrev = ':ORG_ABBREV'
+ LEFT OUTER JOIN :SCHEMA.TFBSGene tfbs on ga.source_id = tfbs.gene_source_id and tfbs.org_abbrev = ':ORG_ABBREV'
+ LEFT OUTER JOIN :SCHEMA.TranscriptAttributes ta on ta.gene_source_id = ga.source_id and ta.org_abbrev = ':ORG_ABBREV'
LEFT OUTER JOIN apidb.MassSpecSummary mss on ta.aa_sequence_id = mss.aa_sequence_id
- LEFT OUTER JOIN chipchipTranscript cct on ga.source_id = cct.gene_source_id
- LEFT OUTER JOIN (
- SELECT distinct s.gene as source_id
- FROM EstAlignmentGeneSummary s, EstAttributes e
- WHERE s.est_gene_overlap_length >= 100
- AND s.is_best_alignment in (1)
- AND s.percent_est_bases_aligned >= 20
- AND s.percent_identity >= 90
- AND e.best_alignment_count <= 1
- AND e.source_id = s.accession
- GROUP by s.gene HAVING count(*) >= 1
- ) est ON ga.source_id = est.source_id
+ LEFT OUTER JOIN :SCHEMA.chipchipTranscript cct on ga.source_id = cct.gene_source_id and cct.org_abbrev = ':ORG_ABBREV'
+ -- LEFT OUTER JOIN (
+ -- SELECT distinct s.gene as source_id
+ -- FROM EstAlignmentGeneSummary s, EstAttributes e
+ -- WHERE s.est_gene_overlap_length >= 100
+ -- AND s.is_best_alignment in (1)
+ -- AND s.percent_est_bases_aligned >= 20
+ -- AND s.percent_identity >= 90
+ -- AND e.best_alignment_count <= 1
+ -- AND e.source_id = s.accession
+ -- GROUP by s.gene HAVING count(*) >= 1
+ -- ) est ON ga.source_id = est.source_id
RIGHT OUTER JOIN (
SELECT project_id, taxon_id,
max(database_version) as database_version,
@@ -218,30 +220,28 @@
ELSE ncbi_tax_id
END ncbi_tax_id,
to_char(sum(length)/1000000,'9999.99') as megabps
- FROM GenomicSeqAttributes
+ FROM :SCHEMA.GenomicSeqAttributes
WHERE is_top_level = 1
+ AND org_abbrev = ':ORG_ABBREV'
GROUP BY project_ID, taxon_id, ncbi_tax_id
) genomestat ON genomestat.taxon_id = ga.taxon_id
- LEFT OUTER JOIN (
- SELECT count(distinct ga.source_id) as ct, ga.taxon_id
- FROM GeneAttributes ga, SnpAttributes sf
- WHERE sf.gene_source_id = ga.source_id
- AND ga.is_deprecated = 0
- GROUP BY ga.taxon_id
- ) snpCount ON ga.taxon_id = snpCount.taxon_id
+ -- LEFT OUTER JOIN (
+ -- SELECT count(distinct ga.source_id) as ct, ga.taxon_id
+ -- FROM GeneAttributes ga, SnpAttributes sf
+ -- WHERE sf.gene_source_id = ga.source_id
+ -- AND ga.is_deprecated = 0
+ -- GROUP BY ga.taxon_id
+ -- ) snpCount ON ga.taxon_id = snpCount.taxon_id
GROUP BY genomestat.taxon_id,
genomestat.project_id,
genomestat.database_version,
genomestat.ncbi_tax_id,
- genomestat.Megabps,
- snpCount.ct
+ genomestat.Megabps
+ -- snpCount.ct
;
:CREATE_AND_POPULATE
-
-
- CREATE TABLE OrganismAttributes AS
SELECT oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id
, CASE WHEN ltrim(replace(oa.organism_name, tn2.name, ''))= oa.organism_name
THEN strain_abbrev
@@ -310,7 +310,8 @@
LEFT JOIN (
SELECT taxon_id, round(avg(length),1) as avg_transcript_length
FROM TranscriptAttributes
- GROUP by taxon_id
+ where org_abbrev = ':ORG_ABBREV'
+ GROUP by taxon_id
) ta ON o.taxon_id = ta.taxon_id
WHERE tn.name_class = 'scientific name'
) oa,
@@ -321,6 +322,7 @@
AND ts.species_taxon_id = t.taxon_id
AND ts.species_taxon_id = tn2.taxon_id
AND tn2.name_class = 'scientific name'
+ and o.taxon_id = :TAXON_ID
:DECLARE_PARTITION;
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index 4b7c8e9056..cd642166d8 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -99,8 +99,8 @@
- [X] EstAlignmentGeneSummary.psql
- Dataset / Other
- - [ ] DatasetExampleSourceId_ix.psql
- - [ ] DatasetExampleSourceId.psql
+ - [s] DatasetExampleSourceId_ix.psql
+ - [s] DatasetExampleSourceId.psql
- [ ] Profile_ix.psql
- [ ] Profile.psql
- [ ] ProfileSamples_ix.psql
@@ -109,8 +109,8 @@
- [ ] ProfileType.psql
- [ ] RnaSeqStats_ix.psql
- [ ] RnaSeqStats.psql
- - [ ] OrganismAttributes_ix.psql
- - [ ] OrganismAttributes.psql
+ - [s] OrganismAttributes_ix.psql
+ - [s] OrganismAttributes.psql
- [ ] ChrCopyNumbers_ix.psql
- [ ] ChrCopyNumbers.psql
From 8b2139611dc942acfb10ee0b00ada7d2e0e0a872 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 13 May 2025 16:09:07 -0400
Subject: [PATCH 012/112] convert some more MOs
---
Model/lib/psql/webtables/MO/ChrCopyNumbers.psql | 4 +++-
Model/lib/xml/tuningManager/webtables.org | 4 ++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
index 8b4b6d5ca1..5b81bd75e7 100644
--- a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
+++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
@@ -6,10 +6,12 @@
, io.output_pan_id
FROM apidb.ChrCopyNumber ccn
, :SCHEMA.TranscriptAttributes ta
- , :SCHEMA.PANIo io
+ , :SCHEMA.PANIo io
WHERE ta.na_sequence_id = ccn.na_sequence_id
AND ta.chromosome IS NOT NULL
AND ccn.protocol_app_node_id = io.output_pan_id
+ and ta.org_abbrev = ':ORG_ABBREV'
+ and io.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index cd642166d8..e8923cdb98 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -111,8 +111,8 @@
- [ ] RnaSeqStats.psql
- [s] OrganismAttributes_ix.psql
- [s] OrganismAttributes.psql
- - [ ] ChrCopyNumbers_ix.psql
- - [ ] ChrCopyNumbers.psql
+ - [s] ChrCopyNumbers_ix.psql
+ - [s] ChrCopyNumbers.psql
- Junctions (Kathryn)
- [ ] IntronSupportLevel_ix.psql
From f7c173d3fbe96ab400150eff5a4a250151aac6fc Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Tue, 13 May 2025 16:11:05 -0400
Subject: [PATCH 013/112] filled in some ?
---
Model/lib/xml/tuningManager/tablePruning.txt | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt
index dfcfa55e0e..3062f04c2c 100644
--- a/Model/lib/xml/tuningManager/tablePruning.txt
+++ b/Model/lib/xml/tuningManager/tablePruning.txt
@@ -1,23 +1,23 @@
MG
-??
+MO
??
??
-??
+K
K
-??
+R
??
K
MO
R
-??
-??
+MO
+MO
??
-??
-??
-??
+MO
+K
+MO (Pretty sure this is MO... but we do have profiles for compounds (not genes). they should be handled different)
K
-??
-??
+K
+MC
MO
MO
MO (could be renamed to LineageForSynteny)
From 2f7e259dba5837150dc23b48b9e0eb0086e6cfae Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 13 May 2025 16:14:05 -0400
Subject: [PATCH 014/112] rename ?? to UK
---
Model/lib/psql/webtables/{?? => UK}/AlphaFoldGenes.psql | 0
Model/lib/psql/webtables/{?? => UK}/AssociatedDataset.psql | 0
Model/lib/psql/webtables/{?? => UK}/DatasetDetail.psql | 0
Model/lib/psql/webtables/{?? => UK}/DatasetPresenter.psql | 0
Model/lib/psql/webtables/{?? => UK}/DomainAssignment.psql | 0
Model/lib/psql/webtables/{?? => UK}/EdaGeneGraph.psql | 0
Model/lib/psql/webtables/{?? => UK}/EupathBuildDates.psql | 0
.../lib/psql/webtables/{?? => UK}/ExternalDbDatasetPresenter.psql | 0
.../lib/psql/webtables/{?? => UK}/ExternalSequenceTaxonRank.psql | 0
Model/lib/psql/webtables/{?? => UK}/GeneGroupProfile.psql | 0
Model/lib/psql/webtables/{?? => UK}/GroupPhylogeneticProfile.psql | 0
Model/lib/psql/webtables/{?? => UK}/OrthologousTranscripts.psql | 0
Model/lib/psql/webtables/{?? => UK}/PANExtDbRls.psql | 0
Model/lib/psql/webtables/{?? => UK}/PANIO.psql | 0
Model/lib/psql/webtables/{?? => UK}/PANResults.psql | 0
Model/lib/psql/webtables/{?? => UK}/PhyleticPattern.psql | 0
Model/lib/psql/webtables/{?? => UK}/ProjectTaxon.psql | 0
Model/lib/psql/webtables/{?? => UK}/SequenceAttributes.psql | 0
Model/lib/psql/webtables/{?? => UK}/SequenceEnzymeClass.psql | 0
Model/lib/psql/webtables/{?? => UK}/StudyIdDatasetId.psql | 0
Model/lib/psql/webtables/{?? => UK}/TypeAheadCounts.psql | 0
21 files changed, 0 insertions(+), 0 deletions(-)
rename Model/lib/psql/webtables/{?? => UK}/AlphaFoldGenes.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/AssociatedDataset.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/DatasetDetail.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/DatasetPresenter.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/DomainAssignment.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/EdaGeneGraph.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/EupathBuildDates.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/ExternalDbDatasetPresenter.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/ExternalSequenceTaxonRank.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/GeneGroupProfile.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/GroupPhylogeneticProfile.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/OrthologousTranscripts.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/PANExtDbRls.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/PANIO.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/PANResults.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/PhyleticPattern.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/ProjectTaxon.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/SequenceAttributes.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/SequenceEnzymeClass.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/StudyIdDatasetId.psql (100%)
rename Model/lib/psql/webtables/{?? => UK}/TypeAheadCounts.psql (100%)
diff --git a/Model/lib/psql/webtables/??/AlphaFoldGenes.psql b/Model/lib/psql/webtables/UK/AlphaFoldGenes.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/AlphaFoldGenes.psql
rename to Model/lib/psql/webtables/UK/AlphaFoldGenes.psql
diff --git a/Model/lib/psql/webtables/??/AssociatedDataset.psql b/Model/lib/psql/webtables/UK/AssociatedDataset.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/AssociatedDataset.psql
rename to Model/lib/psql/webtables/UK/AssociatedDataset.psql
diff --git a/Model/lib/psql/webtables/??/DatasetDetail.psql b/Model/lib/psql/webtables/UK/DatasetDetail.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/DatasetDetail.psql
rename to Model/lib/psql/webtables/UK/DatasetDetail.psql
diff --git a/Model/lib/psql/webtables/??/DatasetPresenter.psql b/Model/lib/psql/webtables/UK/DatasetPresenter.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/DatasetPresenter.psql
rename to Model/lib/psql/webtables/UK/DatasetPresenter.psql
diff --git a/Model/lib/psql/webtables/??/DomainAssignment.psql b/Model/lib/psql/webtables/UK/DomainAssignment.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/DomainAssignment.psql
rename to Model/lib/psql/webtables/UK/DomainAssignment.psql
diff --git a/Model/lib/psql/webtables/??/EdaGeneGraph.psql b/Model/lib/psql/webtables/UK/EdaGeneGraph.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/EdaGeneGraph.psql
rename to Model/lib/psql/webtables/UK/EdaGeneGraph.psql
diff --git a/Model/lib/psql/webtables/??/EupathBuildDates.psql b/Model/lib/psql/webtables/UK/EupathBuildDates.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/EupathBuildDates.psql
rename to Model/lib/psql/webtables/UK/EupathBuildDates.psql
diff --git a/Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql b/Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql
rename to Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql
diff --git a/Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql b/Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql
rename to Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql
diff --git a/Model/lib/psql/webtables/??/GeneGroupProfile.psql b/Model/lib/psql/webtables/UK/GeneGroupProfile.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/GeneGroupProfile.psql
rename to Model/lib/psql/webtables/UK/GeneGroupProfile.psql
diff --git a/Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql b/Model/lib/psql/webtables/UK/GroupPhylogeneticProfile.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql
rename to Model/lib/psql/webtables/UK/GroupPhylogeneticProfile.psql
diff --git a/Model/lib/psql/webtables/??/OrthologousTranscripts.psql b/Model/lib/psql/webtables/UK/OrthologousTranscripts.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/OrthologousTranscripts.psql
rename to Model/lib/psql/webtables/UK/OrthologousTranscripts.psql
diff --git a/Model/lib/psql/webtables/??/PANExtDbRls.psql b/Model/lib/psql/webtables/UK/PANExtDbRls.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/PANExtDbRls.psql
rename to Model/lib/psql/webtables/UK/PANExtDbRls.psql
diff --git a/Model/lib/psql/webtables/??/PANIO.psql b/Model/lib/psql/webtables/UK/PANIO.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/PANIO.psql
rename to Model/lib/psql/webtables/UK/PANIO.psql
diff --git a/Model/lib/psql/webtables/??/PANResults.psql b/Model/lib/psql/webtables/UK/PANResults.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/PANResults.psql
rename to Model/lib/psql/webtables/UK/PANResults.psql
diff --git a/Model/lib/psql/webtables/??/PhyleticPattern.psql b/Model/lib/psql/webtables/UK/PhyleticPattern.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/PhyleticPattern.psql
rename to Model/lib/psql/webtables/UK/PhyleticPattern.psql
diff --git a/Model/lib/psql/webtables/??/ProjectTaxon.psql b/Model/lib/psql/webtables/UK/ProjectTaxon.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/ProjectTaxon.psql
rename to Model/lib/psql/webtables/UK/ProjectTaxon.psql
diff --git a/Model/lib/psql/webtables/??/SequenceAttributes.psql b/Model/lib/psql/webtables/UK/SequenceAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/SequenceAttributes.psql
rename to Model/lib/psql/webtables/UK/SequenceAttributes.psql
diff --git a/Model/lib/psql/webtables/??/SequenceEnzymeClass.psql b/Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/SequenceEnzymeClass.psql
rename to Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql
diff --git a/Model/lib/psql/webtables/??/StudyIdDatasetId.psql b/Model/lib/psql/webtables/UK/StudyIdDatasetId.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/StudyIdDatasetId.psql
rename to Model/lib/psql/webtables/UK/StudyIdDatasetId.psql
diff --git a/Model/lib/psql/webtables/??/TypeAheadCounts.psql b/Model/lib/psql/webtables/UK/TypeAheadCounts.psql
similarity index 100%
rename from Model/lib/psql/webtables/??/TypeAheadCounts.psql
rename to Model/lib/psql/webtables/UK/TypeAheadCounts.psql
From ff0a3fce9aef42e053466a1575958767033da475 Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Tue, 13 May 2025 16:17:36 -0400
Subject: [PATCH 015/112] filled in some ?
---
Model/lib/xml/tuningManager/tablePruning.txt | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt
index 3062f04c2c..aa94aa6da9 100644
--- a/Model/lib/xml/tuningManager/tablePruning.txt
+++ b/Model/lib/xml/tuningManager/tablePruning.txt
@@ -116,8 +116,8 @@ MO
R
MO
R
-?? (could be put into the orthomcl graph. Rich and John should look)
-?? (similar to OrthologousTranscripts)
+MC (could be put into the orthomcl graph. Rich and John should look)
+MC (similar to OrthologousTranscripts)
R
R
MO (PANIO will need to exist)
@@ -128,9 +128,9 @@ K
MO
K
MO (rm auto_lob; don't need to loop over chunks in postgres)
-??
+MC
K
K
-?? (can we do project specific alphafold? or, put this in after alphafold, whereever it goes)
+MC (alpha fold is cross project)
MO
K
From d666a39ba238dddb7a9e62ea243a1537ad800e0d Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 13 May 2025 16:59:01 -0400
Subject: [PATCH 016/112] more updates
---
.../lib/psql/webtables/MO/ChrCopyNumbers.psql | 6 +++-
.../psql/webtables/MO/OrganismAttributes.psql | 3 +-
.../{UK => MO}/SequenceEnzymeClass.psql | 29 ++++++-------------
.../webtables/MO/SequenceEnzymeClass_ix.psql | 7 +++++
Model/lib/xml/tuningManager/webtables.org | 6 +++-
5 files changed, 28 insertions(+), 23 deletions(-)
rename Model/lib/psql/webtables/{UK => MO}/SequenceEnzymeClass.psql (58%)
create mode 100644 Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
index 5b81bd75e7..eafe7652b8 100644
--- a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
+++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
@@ -1,5 +1,9 @@
:CREATE_AND_POPULATE
- SELECT DISTINCT ta.na_sequence_id
+ SELECT DISTINCT
+ ta.project_id
+ , ta.org_abbrev
+ , current_timestamp as modification_date
+ , ta.na_sequence_id
, ta.chromosome
, ccn.chr_copy_number AS ploidy
, io.input_pan_id
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
index 9e49fb17a9..1f68de2f75 100644
--- a/Model/lib/psql/webtables/MO/OrganismAttributes.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
@@ -242,7 +242,8 @@
;
:CREATE_AND_POPULATE
- SELECT oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id
, CASE WHEN ltrim(replace(oa.organism_name, tn2.name, ''))= oa.organism_name
THEN strain_abbrev
ELSE ltrim(replace(oa.organism_name, tn2.name, '')) END AS strain
diff --git a/Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql b/Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql
similarity index 58%
rename from Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql
rename to Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql
index 35fdff6793..f948ba46c7 100644
--- a/Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql
+++ b/Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql
@@ -1,7 +1,8 @@
-
-
- CREATE TABLE SequenceEnzymeClass AS (
- SELECT sa.full_id
+:CREATE_AND_POPULATE
+ SELECT sa.project_id
+ , sa.org_abbrev
+ , current_timestamp as modification_date
+ , sa.full_id
, sa.group_name
-- , sec.uniprot_accession
, ec.ec_number
@@ -11,26 +12,14 @@
, ec.ec_number_2
, ec.ec_number_3
, ec.ec_number_4
- FROM sequenceattributes sa
+ FROM :SCHEMA.sequenceattributes sa
, dots.AASequence aa
, dots.AASequenceEnzymeClass sec
, sres.ENZYMECLASS ec
WHERE sa.aa_sequence_id = aa.aa_sequence_id
AND sec.aa_sequence_id = aa.aa_sequence_id
AND sec.enzyme_class_id = ec.enzyme_class_id
- )
-
- ;
-
-
-
- CREATE INDEX SequenceEnzymeClass_idx1 ON SequenceEnzymeClass (group_name, ec_number, description)
-
- ;
-
-
-
- CREATE INDEX SequenceEnzymeClass_idx2 ON SequenceEnzymeClass (full_id, ec_number, description)
-
- ;
+ AND sa.org_abbrev = ':ORG_ABBREV'
+:DECLARE_PARTITION
+;
diff --git a/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql
new file mode 100644
index 0000000000..9a55de605a
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql
@@ -0,0 +1,7 @@
+
+ CREATE INDEX :SCHEMA.SequenceEnzymeClass_idx1 ON :SCHEMA.SequenceEnzymeClass (group_name, ec_number, description)
+ ;
+
+ CREATE INDEX :SCHEMA.SequenceEnzymeClass_idx2 ON :SCHEMA.SequenceEnzymeClass (full_id, ec_number, description)
+ ;
+
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index e8923cdb98..2ffa4f7f3f 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -22,7 +22,8 @@
- [X] GenomicSequenceSequence.psql
- [X] SequencePieceClosure
- [X] GenomicSeqAttributes
-
+ - [s] SequenceEnzymeClass
+
- Transcript / Protein
- [X] SignalPeptideDomains_ix.psql
- [X] SignalPeptideDomains.psql
@@ -101,6 +102,8 @@
- Dataset / Other
- [s] DatasetExampleSourceId_ix.psql
- [s] DatasetExampleSourceId.psql
+ - [ ] PANIO.psql
+ - [ ] PANIO_ix.psql
- [ ] Profile_ix.psql
- [ ] Profile.psql
- [ ] ProfileSamples_ix.psql
@@ -111,6 +114,7 @@
- [ ] RnaSeqStats.psql
- [s] OrganismAttributes_ix.psql
- [s] OrganismAttributes.psql
+ - removed ESTs and SNPs
- [s] ChrCopyNumbers_ix.psql
- [s] ChrCopyNumbers.psql
From 536410be5772bfa030d7412808c19ccc385561d2 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Wed, 14 May 2025 11:43:14 -0400
Subject: [PATCH 017/112] touch up transattrs
---
Model/lib/psql/webtables/MO/TranscriptAttributes.psql | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
index 069c605c63..e82dea4d5b 100644
--- a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
@@ -431,25 +431,24 @@
;
- -- TODO: THIS IS BROKEN.
UPDATE :SCHEMA.TranscriptAttributes
SET representative_transcript = (
select min(source_id)
from :SCHEMA.TranscriptAttributes ga
where ga.gene_source_id = :SCHEMA.TranscriptAttributes.gene_source_id
+ and org_abbrev = ':ORG_ABBREV'
)
WHERE representative_transcript is null
AND gene_id is not null
-
+ and org_abbrev = ':ORG_ABBREV'
;
- -- TODO: THIS IS BROKEN.
UPDATE :ORG_ABBREVTranscriptAttributes
SET representative_transcript = source_id
WHERE representative_transcript is null
-
+ and org_abbrev = ':ORG_ABBREV'
;
- -- TODO: THIS IS BROKEN.
+
drop table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp
;
From d13012bd9e1ed9194618249674bbc3a55cda2675 Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Wed, 14 May 2025 16:36:51 -0400
Subject: [PATCH 018/112] wip
---
Model/lib/psql/webtables/UK/PANExtDbRls.psql | 18 ++++--
Model/lib/psql/webtables/UK/PANIO.psql | 59 ++++----------------
2 files changed, 24 insertions(+), 53 deletions(-)
diff --git a/Model/lib/psql/webtables/UK/PANExtDbRls.psql b/Model/lib/psql/webtables/UK/PANExtDbRls.psql
index 82b469f127..524ef017c5 100644
--- a/Model/lib/psql/webtables/UK/PANExtDbRls.psql
+++ b/Model/lib/psql/webtables/UK/PANExtDbRls.psql
@@ -1,7 +1,7 @@
+:CREATE_AND_POPULATE
-
- CREATE TABLE :ORG_ABBREVPANExtDbRls AS
- SELECT distinct protocol_app_node_id as pan_id, external_database_release_id, name as dataset_name
+ SELECT distinct protocol_app_node_id as pan_id, external_database_release_id, name as dataset_name,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (
SELECT
sl.protocol_app_node_id
@@ -12,11 +12,15 @@
, study.nodeNodeSet sl
, sres.externaldatabaserelease r
, sres.externaldatabase d
+ , apidb.datasource ds
WHERE
s.external_database_release_id = r.external_database_release_id
and r.external_database_id = d.external_database_id
and s.node_set_id = sl.node_set_id
and s.external_database_release_id is not null
+ and d.name = ds.external_database_name
+ and r.version = ds.version
+ and ds.taxon_id = :TAXON_ID
UNION
SELECT pan.protocol_app_node_id
, pan.external_database_release_id
@@ -24,13 +28,15 @@
FROM study.protocolappnode pan
, sres.externaldatabaserelease r
, sres.externaldatabase d
+ , apidb.datasource ds
WHERE
pan.external_database_release_id = r.external_database_release_id
and r.external_database_id = d.external_database_id
and pan.external_database_release_id is not null
+ and d.name = ds.external_database_name
+ and r.version = ds.version
+ and ds.taxon_id = :TAXON_ID
) t
- WHERE (name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
ORDER BY external_database_release_id, protocol_app_node_id
- ;
-
+:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/UK/PANIO.psql b/Model/lib/psql/webtables/UK/PANIO.psql
index fa820f9d4b..c7afcbeca5 100644
--- a/Model/lib/psql/webtables/UK/PANIO.psql
+++ b/Model/lib/psql/webtables/UK/PANIO.psql
@@ -1,6 +1,4 @@
-
-
- CREATE TABLE :ORG_ABBREVPANIO AS
+:CREATE_AND_POPULATE
SELECT DISTINCT io.*
FROM (
SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id,
@@ -11,49 +9,16 @@
out_type.source_id as output_pan_type_source_id,
--out_type.name as output_pan_type,
out_type.ontology_term_id as output_pan_type_id
- FROM study.ProtocolApp pa, study.Input i, study.Output o,
- study.ProtocolAppNode in_pan LEFT JOIN sres.OntologyTerm in_type ON in_pan.type_id = in_type.ontology_term_id,
- study.ProtocolAppNode out_pan LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id
- WHERE i.protocol_app_id = pa.protocol_app_id
- AND o.protocol_app_id = pa.protocol_app_id
- AND i.protocol_app_node_id = in_pan.protocol_app_node_id
- AND o.protocol_app_node_id = out_pan.protocol_app_node_id
- ) io, :ORG_ABBREVpanextdbrls panExtDbRls
- WHERE io.input_pan_id = panExtDbRls.pan_id -- the input and outputs will have same dataset in prefix enabled mode only
- AND (panExtDbRls.dataset_name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0)
+ FROM :SCHEMA.panextdbrls panExtDbRls,
+ INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id
+ INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id
+ INNER JOIN study.Output o AND o.protocol_app_id = pa.protocol_app_id
+ INNER JOIN study.ProtocolAppNode in_pan on i.protocol_app_node_id = in_pan.protocol_app_node_id
+ INNER JOIN study.ProtocolAppNode out_pan on o.protocol_app_node_id = out_pan.protocol_app_node_id
+ LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id
+ LEFT JOIN sres.OntologyTerm in_type ON in_pan.type_id = in_type.ontology_term_id
+ WHERE panExtDbRls.org_abbrev = ':ORG_ABBREV'
+ ) io
ORDER BY io.input_pan_id, io.output_pan_id
- ;
-
-
-
- create index :ORG_ABBREVpainio2_iix on :ORG_ABBREVPANIO
- (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
-
-
- ;
-
-
-
- create index :ORG_ABBREVpainio2_oix on :ORG_ABBREVPANIO
- (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
-
-
- ;
-
-
-
- create index :ORG_ABBREVpainio2_otypeix on :ORG_ABBREVPANIO
- (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id)
-
-
- ;
-
-
-
- create index :ORG_ABBREVpainio2_itypeix on :ORG_ABBREVPANIO
- (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id)
-
-
- ;
-
+:DECLARE_PARTITION;
From 099f1a08616fb7df8c9df4ca9c3f5393e579eea2 Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Wed, 14 May 2025 17:00:52 -0400
Subject: [PATCH 019/112] move or remove some psql files
---
.../webtables/{UK => MO}/PANExtDbRls.psql | 0
.../lib/psql/webtables/{UK => MO}/PANIO.psql | 0
.../lib/psql/webtables/UK/DatasetDetail.psql | 58 ------------------
.../psql/webtables/UK/DatasetPresenter.psql | 0
.../psql/webtables/UK/EupathBuildDates.psql | 0
.../UK/ExternalDbDatasetPresenter.psql | 46 --------------
.../UK/ExternalSequenceTaxonRank.psql | 60 -------------------
.../psql/webtables/UK/GeneGroupProfile.psql | 30 ----------
.../psql/webtables/UK/StudyIdDatasetId.psql | 24 --------
9 files changed, 218 deletions(-)
rename Model/lib/psql/webtables/{UK => MO}/PANExtDbRls.psql (100%)
rename Model/lib/psql/webtables/{UK => MO}/PANIO.psql (100%)
delete mode 100644 Model/lib/psql/webtables/UK/DatasetDetail.psql
delete mode 100644 Model/lib/psql/webtables/UK/DatasetPresenter.psql
delete mode 100644 Model/lib/psql/webtables/UK/EupathBuildDates.psql
delete mode 100644 Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql
delete mode 100644 Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql
delete mode 100644 Model/lib/psql/webtables/UK/GeneGroupProfile.psql
delete mode 100644 Model/lib/psql/webtables/UK/StudyIdDatasetId.psql
diff --git a/Model/lib/psql/webtables/UK/PANExtDbRls.psql b/Model/lib/psql/webtables/MO/PANExtDbRls.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/PANExtDbRls.psql
rename to Model/lib/psql/webtables/MO/PANExtDbRls.psql
diff --git a/Model/lib/psql/webtables/UK/PANIO.psql b/Model/lib/psql/webtables/MO/PANIO.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/PANIO.psql
rename to Model/lib/psql/webtables/MO/PANIO.psql
diff --git a/Model/lib/psql/webtables/UK/DatasetDetail.psql b/Model/lib/psql/webtables/UK/DatasetDetail.psql
deleted file mode 100644
index 1ec5eed7be..0000000000
--- a/Model/lib/psql/webtables/UK/DatasetDetail.psql
+++ /dev/null
@@ -1,58 +0,0 @@
-
-
- CREATE TABLE DatasetDetail AS
- SELECT dataset_presenter_id,
- name || ' ' || category || ' ' || usage || ' ' ||
- caveat || ' ' || acknowledgement || ' ' || type || ' ' || subtype
- ||' ' || summary || ' ' || description || ' ' || contact || ' ' ||
- institution || ' ' || pubmed_id || ' ' || citation as search_string
- FROM (
- SELECT
- sub.dataset_presenter_id as dataset_presenter_id,
- sub.name as name,
- sub.category as category,
- sub.usage as usage,
- sub.caveat as caveat,
- sub.acknowledgement as acknowledgement,
- sub.type as type,
- sub.subtype as subtype,
- sub.contact,
- sub.institution,
- sub.pubmed_id,
- sub.citation,
- dp.summary,
- dp.description
- FROM DatasetPresenter dp,
- (
- SELECT DISTINCT
- dp.dataset_presenter_id as dataset_presenter_id,
- dp.display_name as name,
- dp.display_category as category,
- dp.usage as usage,
- dp.caveat as caveat,
- dp.acknowledgement as acknowledgement,
- dp.type as type,
- dp.subtype as subtype,
- dc.name as contact,
- dc.affiliation as institution,
- string_agg(dpub.pmid, ' ' ORDER BY dpub.pmid) as pubmed_id,
- -- CHECK AND FIX - regexp_like ISSUE
- --string_agg(CASE WHEN REGEXP_LIKE(dpub.citation, '[[:digit:]]{4};')
- -- THEN substr(citation, 1, regexp_instr(citation, '[[:digit:]]{4};' ) - 1)
- -- ELSE dpub.citation
- -- END , ' ' ORDER BY dpub.citation) as citation
- string_agg(dpub.citation, ' ' ORDER BY dpub.citation) as citation
- FROM DatasetPresenter dp, DatasetContact dc,
- DatasetPublication dpub
- WHERE dp.dataset_presenter_id = dc.dataset_presenter_id
- AND dp.dataset_presenter_id = dpub.dataset_presenter_id
- AND dc.is_primary_contact = true
- GROUP by dp.dataset_presenter_id, dp.display_name,dp.display_category,
- dp.usage,dp.caveat,dp.acknowledgement,dp.type,dp.subtype,dc.name,
- dc.affiliation
- ) sub
- WHERE dp.dataset_presenter_id = sub.dataset_presenter_id
- ) t
-
- ;
-
diff --git a/Model/lib/psql/webtables/UK/DatasetPresenter.psql b/Model/lib/psql/webtables/UK/DatasetPresenter.psql
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/Model/lib/psql/webtables/UK/EupathBuildDates.psql b/Model/lib/psql/webtables/UK/EupathBuildDates.psql
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql b/Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql
deleted file mode 100644
index 0b8856e4e5..0000000000
--- a/Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql
+++ /dev/null
@@ -1,46 +0,0 @@
-
-
- CREATE TABLE ExternalDbDatasetPresenter AS
- SELECT ed.external_database_id, ed.name AS external_database_name,
- edr.external_database_release_id, SUBSTR(edr.version, 1, 40) AS external_database_version,
- dsp.dataset_presenter_id, dsp.name AS dataset_presenter_name,
- dsp.display_name AS dataset_presenter_display_name
- FROM sres.externalDatabaseRelease edr, sres.externalDatabase ed, DatasetPresenter dsp
- WHERE ed.external_database_id = edr.external_database_id
- AND (ed.name = dsp.name
- OR ed.name LIKE dsp.dataset_name_pattern)
- ORDER BY ed.name
-
- ;
-
-
-
- create index edd_rlsidix
- on ExternalDbDatasetPresenter
- (external_database_release_id, external_database_id, external_database_name,
- dataset_presenter_id, dataset_presenter_name, dataset_presenter_display_name)
-
-
- ;
-
-
-
- create index edd_dsidix
- on ExternalDbDatasetPresenter
- (dataset_presenter_id, external_database_id, external_database_release_id,
- external_database_name, dataset_presenter_name, dataset_presenter_display_name)
-
-
- ;
-
-
-
- create index edd_dsnameix
- on ExternalDbDatasetPresenter
- (dataset_presenter_name, dataset_presenter_id, external_database_id,
- external_database_release_id, external_database_name, external_database_version,
- dataset_presenter_display_name)
-
-
- ;
-
diff --git a/Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql b/Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql
deleted file mode 100644
index 1dd9673b23..0000000000
--- a/Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql
+++ /dev/null
@@ -1,60 +0,0 @@
-
-
- CREATE TABLE ExternalSequenceTaxonRank AS
- WITH organism_rank AS (
- SELECT tn1.taxon_id as organism, tn2.name as parent_organism,
- tn2.taxon_id as parent_organism_id, r.rank
- FROM sres.TaxonName tn1, sres.TaxonName tn2,
- (
- WITH RECURSIVE cte AS (
- SELECT taxon_id as input, taxon_id, rank, parent_id
- FROM sres.taxon
- WHERE taxon_id IN (
- SELECT taxon_id FROM dots.externalaasequence
- UNION
- SELECT taxon_id FROM apidb.taxonstring
- )
- UNION
- SELECT cte.input, t.taxon_id, t.rank, t.parent_id
- FROM sres.taxon t, cte
- WHERE cte.parent_id = t.taxon_id
- )
- SELECT input, taxon_id, rank
- FROM cte
- ) r
- WHERE r.input = tn1.taxon_id
- and r.taxon_id = tn2.taxon_id
- and tn1.name_class = 'scientific name'
- and tn2.name_class = 'scientific name'
- and r.rank in ('phylum', 'family','genus', 'species', 'superkingdom','kingdom', 'class', 'order')
- )
- SELECT organisms.organism,
- coalesce(superkingdom.parent_organism, 'N/A') as superkingdom,
- superkingdom.parent_organism_id as superkingdom_id,
- coalesce(kingdom.parent_organism, 'N/A') as kingdom,
- kingdom.parent_organism_id as kingdom_id,
- coalesce (phylum.parent_organism, 'N/A') as phylum,
- phylum.parent_organism_id as phylum_id,
- coalesce (class.parent_organism, 'N/A') as class,
- class.parent_organism_id as class_id,
- coalesce (family.parent_organism, 'N/A') as family,
- family.parent_organism_id as family_id,
- coalesce (rank_order.parent_organism, 'N/A') as rank_order,
- rank_order.parent_organism_id as rank_order_id,
- coalesce ( genus.parent_organism, 'N/A') as genus,
- genus.parent_organism_id as genus_id,
- coalesce(species.parent_organism, 'N/A') as species,
- species.parent_organism_id as species_id
- FROM (SELECT DISTINCT organism FROM organism_rank) organisms
- LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'phylum') phylum ON organisms.organism = phylum.organism
- LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'genus') genus ON organisms.organism = genus.organism
- LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'species') species ON organisms.organism = species.organism
- LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'kingdom') kingdom ON organisms.organism = kingdom.organism
- LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'superkingdom') superkingdom ON organisms.organism = superkingdom.organism
- LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'class') class ON organisms.organism = class.organism
- LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'family') family ON organisms.organism = family.organism
- LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'order') rank_order ON organisms.organism = rank_order.organism
- ORDER BY organism, species, genus
-
- ;
-
diff --git a/Model/lib/psql/webtables/UK/GeneGroupProfile.psql b/Model/lib/psql/webtables/UK/GeneGroupProfile.psql
deleted file mode 100644
index e91fd35148..0000000000
--- a/Model/lib/psql/webtables/UK/GeneGroupProfile.psql
+++ /dev/null
@@ -1,30 +0,0 @@
-
-
- create table GeneGroupProfile as
- select distinct other_gene.source_id, p.dataset_name,
- this_gene.source_id as profile_graph_id
- from OrthologousTranscripts ot
- , Profile p
- , GeneAttributes this_gene
- , GeneAttributes other_gene
- where p.source_id = ot.source_id
- and ot.source_id = this_gene.source_id
- and ot.ortho_gene_source_id = other_gene.source_id
- and this_gene.species = other_gene.species
- and ot.is_syntenic = 1
- union
- select ga.source_id, p.dataset_name, p.source_id as profile_graph_id
- from Profile p, GeneAttributes ga
- where p.source_id = ga.source_id
-
- ;
-
-
-
- create index ggp_ix
- on GeneGroupProfile
- (source_id, dataset_name, profile_graph_id)
-
-
- ;
-
diff --git a/Model/lib/psql/webtables/UK/StudyIdDatasetId.psql b/Model/lib/psql/webtables/UK/StudyIdDatasetId.psql
deleted file mode 100644
index c42d4df261..0000000000
--- a/Model/lib/psql/webtables/UK/StudyIdDatasetId.psql
+++ /dev/null
@@ -1,24 +0,0 @@
-
-
- CREATE TABLE StudyIdDatasetId AS
- SELECT s.STABLE_ID STUDY_STABLE_ID, dp.DATASET_PRESENTER_ID DATASET_ID, dp.SHORT_DISPLAY_NAME AS DATASET_SHORT_DISPLAY_NAME
- FROM EDA.STUDY s
- LEFT JOIN sres.EXTERNALDATABASERELEASE e ON s.EXTERNAL_DATABASE_RELEASE_ID =e.EXTERNAL_DATABASE_RELEASE_ID
- LEFT JOIN sres.EXTERNALDATABASE e2 ON e.EXTERNAL_DATABASE_ID =e2.EXTERNAL_DATABASE_ID
- LEFT JOIN DatasetPresenter dp on e2.name=dp.name
- -- This is TEMPORARY (used for alpha MapVEU Application)
- UNION
- select case
- when d.dataset_presenter_id = 'DS_480c976ef9' then 'VBP_MEGA'
- when d.dataset_presenter_id = 'DS_e18287e335' then '2023-maine-ricinus'
- when d.dataset_presenter_id = 'DS_2b98dd44ab' then '2010-Neafsey-M-S-Bamako'
- else 'NA' end as study_stable_id,
- d.dataset_presenter_id as dataset_id, d.short_display_name as dataset_short_display_name
- from DatasetPresenter d
- where d.dataset_presenter_id in (
- 'DS_480c976ef9',
- 'DS_e18287e335'
- )
-
- ;
-
From ec5bf01e1e3f8ab8ae87b5d19193d485dfb88836 Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Wed, 14 May 2025 17:01:05 -0400
Subject: [PATCH 020/112] wip
---
.../webtables/MO/DatasetExampleSourceId.psql | 6 +-
.../psql/webtables/MO/OrganismAttributes.psql | 91 ++++++++++---------
Model/lib/psql/webtables/MO/PANIO_ix.psql | 29 ++++++
.../psql/webtables/MO/ProteinAttributes.psql | 34 +++----
Model/lib/psql/webtables/MO/RnaSeqStats.psql | 13 +--
Model/lib/xml/tuningManager/tablePruning.txt | 16 ++--
Model/lib/xml/tuningManager/webtables.org | 36 ++++----
7 files changed, 131 insertions(+), 94 deletions(-)
create mode 100644 Model/lib/psql/webtables/MO/PANIO_ix.psql
diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
index 3d10c6319c..4e01371739 100644
--- a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
+++ b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
@@ -5,18 +5,18 @@
d.name,
row_number() over(partition by d.name
order by ga.chromosome_order_num, p.profile_as_string desc) as rn
- FROM Profile p
+ FROM :SCHEMA.Profile p
INNER JOIN sres.ExternalDatabase d ON p.dataset_name = d.name
LEFT JOIN :SCHEMA.GeneAttributes ga ON p.source_id = ga.source_id
WHERE p.profile_as_string is not null
- and ga.org_abbrev = ':ORG_ABBREV'
+ and ga.org_abbrev = ':ORG_ABBREV'
+ and p.org_abbrev = ':ORG_ABBREV'
)
SELECT
':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
p.source_id as example_source_id, p.sequence_id, p.name as dataset
FROM profiles p
WHERE p.rn = 1
-
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
index 1f68de2f75..bda5e8a2b3 100644
--- a/Model/lib/psql/webtables/MO/OrganismAttributes.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql
@@ -114,45 +114,47 @@
then p.source_id
else ''
end)) as geneArrayCount
- FROM Profile p
- RIGHT OUTER JOIN :SCHEMA.GeneAttributes ga ON ga.source_id = p.source_id and ga.org_abbrev = ':ORG_ABBREV'
+ FROM :SCHEMA.Profile p
+ RIGHT OUTER JOIN :SCHEMA.GeneAttributes ga ON ga.source_id = p.source_id
+ WHERE ga.org_abbrev = ':ORG_ABBREV'
+ and p.org_abbrev = ':ORG_ABBREV'
GROUP BY ga.taxon_id
;
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPopsetCount AS
- SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id
- FROM (
- (SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id
- FROM dots.similarity s, PopsetAttributes i,
- core.tableinfo t, dots.nasequence nas
- WHERE s.query_id = i.na_sequence_id
- AND nas.na_sequence_id = s.subject_id
- AND t.table_id = s.subject_table_id
- AND t.table_id = s.query_table_id
- AND t.name = 'ExternalNASequence'
- AND s.pvalue_exp <= -10
- and nas.taxon_id = :TAXON_ID
- ) sim LEFT JOIN
- (SELECT i.source_id, seq.source_id as sequence_id
- FROM dots.similarity s, PopsetAttributes i, GeneAttributes g,
- core.tableinfo t, dots.nasequence seq
- WHERE s.query_id = i.na_sequence_id
- AND s.subject_id = g.na_sequence_id
- AND t.table_id = s.subject_table_id
- AND t.table_id = s.query_table_id
- AND s.min_subject_start <= g.end_max
- AND s.max_subject_end >= g.start_min
- AND g.na_sequence_id = seq.na_sequence_id
- AND t.name = 'ExternalNASequence'
- and seq.taxon_id = :TAXON_ID
- ) gene
- ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id)
- GROUP BY sim.taxon_id
-
- ;
+ -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPopsetCount AS
+ -- SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id
+ -- FROM (
+ -- (SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id
+ -- FROM dots.similarity s, PopsetAttributes i,
+ -- core.tableinfo t, dots.nasequence nas
+ -- WHERE s.query_id = i.na_sequence_id
+ -- AND nas.na_sequence_id = s.subject_id
+ -- AND t.table_id = s.subject_table_id
+ -- AND t.table_id = s.query_table_id
+ -- AND t.name = 'ExternalNASequence'
+ -- AND s.pvalue_exp <= -10
+ -- and nas.taxon_id = :TAXON_ID
+ -- ) sim LEFT JOIN
+ -- (SELECT i.source_id, seq.source_id as sequence_id
+ -- FROM dots.similarity s, PopsetAttributes i, GeneAttributes g,
+ -- core.tableinfo t, dots.nasequence seq
+ -- WHERE s.query_id = i.na_sequence_id
+ -- AND s.subject_id = g.na_sequence_id
+ -- AND t.table_id = s.subject_table_id
+ -- AND t.table_id = s.query_table_id
+ -- AND s.min_subject_start <= g.end_max
+ -- AND s.max_subject_end >= g.start_min
+ -- AND g.na_sequence_id = seq.na_sequence_id
+ -- AND t.name = 'ExternalNASequence'
+ -- and seq.taxon_id = :TAXON_ID
+ -- ) gene
+ -- ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id)
+ -- GROUP BY sim.taxon_id
+
+ -- ;
@@ -195,8 +197,8 @@
then ga.source_id
else NULL
end)),0) ecNumberCount
- FROM GeneAttributes ga
- LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id
+ FROM :SCHEMA.GeneAttributes ga
+ LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id and ga.org_abbrev = ':ORG_ABBREV'
LEFT OUTER JOIN :SCHEMA.gotermsummary gts on ga.source_id = gts.gene_source_id and gts.org_abbrev = ':ORG_ABBREV'
LEFT OUTER JOIN :SCHEMA.TFBSGene tfbs on ga.source_id = tfbs.gene_source_id and tfbs.org_abbrev = ':ORG_ABBREV'
LEFT OUTER JOIN :SCHEMA.TranscriptAttributes ta on ta.gene_source_id = ga.source_id and ta.org_abbrev = ':ORG_ABBREV'
@@ -293,37 +295,38 @@
coalesce(sc.supercont_num, 0) as supercontigCount,
coalesce(sc.chrom_num, 0) as chromosomeCount,
coalesce(cc.communityCount, 0) as communityCount,
- coalesce(psc.popsetCount, 0) as popsetCount,
+ --coalesce(psc.popsetCount, 0) as popsetCount,
coalesce(pc.geneArrayCount, 0) as arrayGeneCount,
coalesce(pc.rnaSeqCount, 0) as rnaSeqCount,
coalesce(pc.rtPCRCount, 0) as rtPCRCount,
coalesce(ta.avg_transcript_length, 0) as avg_transcript_length
FROM apidb.Organism o
- INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id
+ INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id and o.taxon_id = :TAXON_ID
INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id
LEFT JOIN :SCHEMA.:ORG_ABBREVDataSourceCount dsc ON o.taxon_id = dsc.taxon_id
LEFT JOIN :SCHEMA.:ORG_ABBREVOrganismCentromere oc ON o.taxon_id = oc.taxon_id
LEFT JOIN :SCHEMA.:ORG_ABBREVSequenceCount sc ON o.taxon_id = sc.taxon_id
LEFT JOIN :SCHEMA.:ORG_ABBREVCommunityCount cc ON o.taxon_id = cc.taxon_id
LEFT JOIN :SCHEMA.:ORG_ABBREVGeneCount gc ON o.taxon_id = gc.taxon_id
- LEFT JOIN :SCHEMA.:ORG_ABBREVpopsetCount psc ON o.taxon_id = psc.taxon_id
+ --LEFT JOIN :SCHEMA.:ORG_ABBREVpopsetCount psc ON o.taxon_id = psc.taxon_id
LEFT JOIN :SCHEMA.:ORG_ABBREVprofileCount pc ON o.taxon_id = pc.taxon_id
LEFT JOIN (
SELECT taxon_id, round(avg(length),1) as avg_transcript_length
- FROM TranscriptAttributes
- where org_abbrev = ':ORG_ABBREV'
- GROUP by taxon_id
+ FROM :SCHEMA.TranscriptAttributes
+ where org_abbrev = ':ORG_ABBREV'
+ GROUP by taxon_id
) ta ON o.taxon_id = ta.taxon_id
WHERE tn.name_class = 'scientific name'
) oa,
- TaxonSpecies ts,
+ :SCHEMA.TaxonSpecies ts,
sres.taxon t,
sres.taxonname tn2
WHERE oa.component_taxon_id = ts.taxon_id
AND ts.species_taxon_id = t.taxon_id
AND ts.species_taxon_id = tn2.taxon_id
AND tn2.name_class = 'scientific name'
- and o.taxon_id = :TAXON_ID
+ AND o.taxon_id = :TAXON_ID
+ AND ts.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
@@ -331,7 +334,7 @@
drop table :SCHEMA.:ORG_ABBREVDataSourceCount;
drop table :SCHEMA.:ORG_ABBREVOrganismCentromere;
drop table :SCHEMA.:ORG_ABBREVProfileCount;
-drop table :SCHEMA.:ORG_ABBREVPopsetCount;
+--drop table :SCHEMA.:ORG_ABBREVPopsetCount;
drop table :SCHEMA.:ORG_ABBREVGeneCount;
drop table :SCHEMA.:ORG_ABBREVSequenceCount;
drop table :SCHEMA.:ORG_ABBREVCommunityCount;
diff --git a/Model/lib/psql/webtables/MO/PANIO_ix.psql b/Model/lib/psql/webtables/MO/PANIO_ix.psql
new file mode 100644
index 0000000000..90176e14d7
--- /dev/null
+++ b/Model/lib/psql/webtables/MO/PANIO_ix.psql
@@ -0,0 +1,29 @@
+ create index :SCHEMA.painio2_iix on :SCHEMA.PANIO
+ (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
+
+
+ ;
+
+
+
+ create index :SCHEMA.painio2_oix on :SCHEMA.PANIO
+ (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
+
+
+ ;
+
+
+
+ create index :SCHEMA.painio2_otypeix on :SCHEMA.PANIO
+ (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id)
+
+
+ ;
+
+
+
+ create index :SCHEMA.painio2_itypeix on :SCHEMA.PANIO
+ (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id)
+
+
+ ;
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
index ad6c7cd95b..645a1edfae 100644
--- a/Model/lib/psql/webtables/MO/ProteinAttributes.psql
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
@@ -102,20 +102,20 @@
;
-
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS
- SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived
- FROM (SELECT DISTINCT asec.aa_sequence_id,
- ec.ec_number || ' (' || ec.description || ')' AS ec_number
- FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq
- WHERE ec.enzyme_class_id = asec.enzyme_class_id
- AND seq.aa_sequence_id = asec.aa_sequence_id
- AND seq.taxon_id = :TAXON_ID
- AND asec.evidence_code = 'OrthoMCLDerived'
- ) t
- GROUP BY aa_sequence_id
-
- ;
+ --TODO: these rows will not exist in org specific land
+ -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS
+ -- SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived
+ -- FROM (SELECT DISTINCT asec.aa_sequence_id,
+ -- ec.ec_number || ' (' || ec.description || ')' AS ec_number
+ -- FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq
+ -- WHERE ec.enzyme_class_id = asec.enzyme_class_id
+ -- AND seq.aa_sequence_id = asec.aa_sequence_id
+ -- AND seq.taxon_id = :TAXON_ID
+ -- AND asec.evidence_code = 'OrthoMCLDerived'
+ -- ) t
+ -- GROUP BY aa_sequence_id
+
+ -- ;
-- TODO: Filter the subqueries or break into tmp tables for performance
@@ -139,7 +139,7 @@
asa.aromaticity_score,
SUBSTR(sigp.peptide_sequence, 1, 200) as signalp_peptide,
ec_numbers,
- ec_numbers_derived,
+ --ec_numbers_derived,
go.annotated_go_component,
go.annotated_go_function,
go.annotated_go_process,
@@ -186,7 +186,7 @@
GROUP BY tms.aa_sequence_id
) transmembrane ON tas.aa_sequence_id = transmembrane.aa_sequence_id
LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp ec ON tas.aa_sequence_id = ec.aa_sequence_id
- LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id
+ --LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id
LEFT JOIN (
SELECT af.aa_sequence_id,
string_agg(dbref.primary_identifier, ',' order by dbref.primary_identifier) as uniprot_ids
@@ -215,4 +215,4 @@
drop table :SCHEMA.:ORG_ABBREVGoTermList_tmp;
drop table :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp;
drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp;
-drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp;
+--drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp;
diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats.psql b/Model/lib/psql/webtables/MO/RnaSeqStats.psql
index 33964a796c..8945fa2c92 100644
--- a/Model/lib/psql/webtables/MO/RnaSeqStats.psql
+++ b/Model/lib/psql/webtables/MO/RnaSeqStats.psql
@@ -1,8 +1,6 @@
:CREATE_AND_POPULATE
-
-
- create table RnaSeqStats as
- select study_id, study_name, dataset_name, taxon_id, round(avg(num_reads::integer),0) as avg_unique_reads
+ select study_id, study_name, dataset_name, taxon_id, round(avg(num_reads::integer),0) as avg_unique_reads,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
from (select sl.node_set_id as study_id
, s.name || '[' || s.node_type || ']' as study_name
, ed.name as dataset_name
@@ -23,10 +21,12 @@
and pan.protocol_app_node_id = c.protocol_app_node_id
and c.qualifier_id = ot.ontology_term_id
and (ot.source_id = 'EUPATH_0000460' or ot.source_id = 'EuPathUserDefined_00507')
- ) subquery1
+ and ds.taxon_id = :TAXON_ID
+ ) subquery1
group by study_id, study_name, dataset_name, taxon_id
union
- select study_id, study_name, dataset_name, taxon_id, round(2*avg(num_reads::integer),0) as avg_unique_reads
+ select study_id, study_name, dataset_name, taxon_id, round(2*avg(num_reads::integer),0) as avg_unique_reads,
+ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
from (select sl.node_set_id as study_id
, s.name || '[' || s.node_type || ']' as study_name
, ed.name as dataset_name
@@ -47,6 +47,7 @@
and pan.protocol_app_node_id = c.protocol_app_node_id
and c.qualifier_id = ot.ontology_term_id
and (ot.source_id = 'EUPATH_0000468' or ot.source_id = 'EuPathUserDefined_00515' or ot.source_id = 'EUPATH_0000476' or ot.source_id = 'EuPathUserDefined_00523')
+ and ds.taxon_id = :TAXON_ID
) subquery2
group by study_id, study_name, dataset_name, taxon_id
diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt
index aa94aa6da9..d7d5687d76 100644
--- a/Model/lib/xml/tuningManager/tablePruning.txt
+++ b/Model/lib/xml/tuningManager/tablePruning.txt
@@ -7,9 +7,9 @@ K
R
??
K
-MO
+MC Should rename this "ProteinSequenceGroup"
R
-MO
+K Need to confirm with Rich but this should be handled now with the new interpro table (or tt)
MO
??
MO
@@ -33,9 +33,9 @@ MO
MO
K
MO (need BOTH org specific version and global -- WHY???)
-MO
+K
MO
-MO (Comment column needs to be made into a dedicated attribute query)
+K (Comment column needs to be made into a dedicated attribute query)
MO
MO
MO
@@ -107,9 +107,9 @@ K
MO
K
K
-MO (need to look at this)
-MO (need to look at this)
-MO (need to look at this)
+K (need to look at this)
+K (need to look at this)
+K (need to look at this)
K
MO
MO
@@ -117,7 +117,7 @@ R
MO
R
MC (could be put into the orthomcl graph. Rich and John should look)
-MC (similar to OrthologousTranscripts)
+K (similar to OrthologousTranscripts)
R
R
MO (PANIO will need to exist)
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index 2ffa4f7f3f..fa0ecc2bde 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -100,23 +100,27 @@
- [X] EstAlignmentGeneSummary.psql
- Dataset / Other
- - [s] DatasetExampleSourceId_ix.psql
- - [s] DatasetExampleSourceId.psql
- - [ ] PANIO.psql
- - [ ] PANIO_ix.psql
- - [ ] Profile_ix.psql
- - [ ] Profile.psql
- - [ ] ProfileSamples_ix.psql
- - [ ] ProfileSamples.psql
- - [ ] ProfileType_ix.psql
- - [ ] ProfileType.psql
- - [ ] RnaSeqStats_ix.psql
- - [ ] RnaSeqStats.psql
- - [s] OrganismAttributes_ix.psql
- - [s] OrganismAttributes.psql
+ - [X] DatasetExampleSourceId_ix.psql
+ - [X] DatasetExampleSourceId.psql
+ - NOTE: this depends on Profiles
+ - [X] PANExtDBRls.psql
+ - [X] PANIO.psql
+ - [X] PANIO_ix.psql
+
+ - [ ] +ProfileType_ix.psql+
+ - [ ] +ProfileType.psql+
+ - [ ] +Profile_ix.psql+
+ - [ ] +Profile.psql+
+ - [ ] +ProfileSamples_ix.psql+
+ - [ ] +ProfileSamples.psql+
+
+ - [X] RnaSeqStats_ix.psql
+ - [X] RnaSeqStats.psql
+ - [X] OrganismAttributes_ix.psql
+ - [X] OrganismAttributes.psql
- removed ESTs and SNPs
- - [s] ChrCopyNumbers_ix.psql
- - [s] ChrCopyNumbers.psql
+ - [X] ChrCopyNumbers_ix.psql
+ - [X] ChrCopyNumbers.psql
- Junctions (Kathryn)
- [ ] IntronSupportLevel_ix.psql
From cd9c52dcc202297143c472708c3ecfd040863a10 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 16 May 2025 15:55:41 -0400
Subject: [PATCH 021/112] debug
---
Model/lib/psql/webtables/MO/GeneLocations_ix.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
index 135dfff694..6839eea3dc 100644
--- a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
@@ -1,4 +1,4 @@
- create :SCHEMA.index gloc_ix
+ create index :SCHEMA.gloc_ix
on :SCHEMA.GeneLocations (source_id, locations)
;
From 82793752dae638b7029693f7751413b3d37279e5 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 16 May 2025 16:05:20 -0400
Subject: [PATCH 022/112] debug
---
Model/lib/psql/webtables/MO/GeneLocations_ix.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
index 6839eea3dc..006a1a1dff 100644
--- a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.gloc_ix
+ create index gloc_ix
on :SCHEMA.GeneLocations (source_id, locations)
;
From 70888238a5d3ea2d810497845f66d66d58bdb245 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 16 May 2025 16:29:37 -0400
Subject: [PATCH 023/112] correct indexes
---
.../webtables/MG/CompoundAttributes_ix.psql | 2 +-
.../lib/psql/webtables/MG/CompoundId_ix.psql | 2 +-
.../webtables/MG/GroupDomainAttribute_ix.psql | 2 +-
.../psql/webtables/MG/OntologyLevels_ix.psql | 2 +-
.../webtables/MG/PathwayAttributes_ix.psql | 4 +-
.../webtables/MG/PathwayCompounds_ix.psql | 2 +-
.../webtables/MG/PathwayReactions_ix.psql | 2 +-
.../webtables/MO/ChIPchipTranscript_ix.psql | 2 +-
.../psql/webtables/MO/ChrCopyNumbers_ix.psql | 4 +-
.../psql/webtables/MO/CodingSequence_ix.psql | 2 +-
Model/lib/psql/webtables/MO/EqtlSpan_ix.psql | 2 +-
.../MO/EstAlignmentGeneSummary_ix.psql | 4 +-
.../psql/webtables/MO/EstAttributes_ix.psql | 2 +-
.../lib/psql/webtables/MO/EstSequence_ix.psql | 2 +-
.../psql/webtables/MO/GeneAttributes_ix.psql | 22 +++++------
.../psql/webtables/MO/GeneCopyNumbers_ix.psql | 2 +-
.../lib/psql/webtables/MO/GeneGoTable_ix.psql | 2 +-
.../webtables/MO/GeneIntJuncStats_ix.psql | 2 +-
.../webtables/MO/GeneIntronJunction_ix.psql | 6 +--
.../webtables/MO/GeneMaxIntronGIJ_ix.psql | 2 +-
.../psql/webtables/MO/GeneModelDump_ix.psql | 2 +-
.../webtables/MO/GenomicSeqAttributes_ix.psql | 8 ++--
.../webtables/MO/GenomicSequenceId_ix.psql | 6 +--
.../MO/GenomicSequenceSequence_ix.psql | 2 +-
.../psql/webtables/MO/GoTermSummary_ix.psql | 4 +-
.../psql/webtables/MO/NameMappingGIJ_ix.psql | 2 +-
.../webtables/MO/OrganismAttributes_ix.psql | 2 +-
Model/lib/psql/webtables/MO/PANIO_ix.psql | 8 ++--
.../webtables/MO/PathwaysGeneTable_ix.psql | 2 +-
.../psql/webtables/MO/ProfileSamples_ix.psql | 4 +-
Model/lib/psql/webtables/MO/Profile_ix.psql | 6 +--
.../webtables/MO/ProteinAttributes_ix.psql | 4 +-
.../psql/webtables/MO/ProteinSequence_ix.psql | 2 +-
.../webtables/MO/SequenceEnzymeClass_ix.psql | 4 +-
.../webtables/MO/SignalPeptideDomains_ix.psql | 4 +-
Model/lib/psql/webtables/MO/TFBSGene_ix.psql | 4 +-
Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 2 +-
.../webtables/MO/TranscriptAttributes_ix.psql | 38 +++++++++----------
.../MO/TranscriptCenDistance_ix.psql | 2 +-
.../webtables/MO/TranscriptPathway_ix.psql | 4 +-
.../webtables/MO/TranscriptSequence_ix.psql | 2 +-
.../webtables/MO/TransmembraneDomains_ix.psql | 2 +-
42 files changed, 92 insertions(+), 92 deletions(-)
diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql b/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql
index a16c042e5b..697149da51 100644
--- a/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql
@@ -1,2 +1,2 @@
- CREATE INDEX :SCHEMA.CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id)
+ CREATE INDEX CompoundAttributes_idx ON CompoundAttributes (source_id)
;
diff --git a/Model/lib/psql/webtables/MG/CompoundId_ix.psql b/Model/lib/psql/webtables/MG/CompoundId_ix.psql
index 217b020bac..20a155ef65 100644
--- a/Model/lib/psql/webtables/MG/CompoundId_ix.psql
+++ b/Model/lib/psql/webtables/MG/CompoundId_ix.psql
@@ -1,2 +1,2 @@
- CREATE INDEX :SCHEMA.CompoundId_idx ON :SCHEMA.CompoundId (id, compound)
+ CREATE INDEX CompoundId_idx ON :SCHEMA.CompoundId (id, compound)
;
diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql
index 4112a31ce3..b796bb25e4 100644
--- a/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql
+++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql
@@ -1,2 +1,2 @@
-CREATE INDEX SCHEMA.GroupDomainAttribute_idx ON SCHEMA.GroupDomainAttribute (group_name)
+CREATE INDEX GroupDomainAttribute_idx ON :SCHEMA.GroupDomainAttribute (group_name)
;
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql b/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql
index 708dc47e5a..5f95d3889a 100644
--- a/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql
+++ b/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql
@@ -1,2 +1,2 @@
- create index :SCHEMA.olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth)
+ create index olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth)
;
diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql b/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql
index 99f50c5a08..a5c4902674 100644
--- a/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql
@@ -1,7 +1,7 @@
- CREATE UNIQUE INDEX :SCHEMA.PathAttr_sourceId_pwaySrc
+ CREATE UNIQUE PathAttr_sourceId_pwaySrc
ON :SCHEMA.PathwayAttributes (source_id, pathway_source)
;
- create index :SCHEMA.PathAttr_ix
+ create index PathAttr_ix
on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count)
;
diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql b/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql
index fbcdfa72e1..3aae4dbb58 100644
--- a/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql
+++ b/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.PthCmpd_id_ix
+ create index PthCmpd_id_ix
on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name)
;
diff --git a/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql b/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql
index e0ed73978b..50dd09d63b 100644
--- a/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql
+++ b/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.PathRcts_id_ix
+ create index PathRcts_id_ix
on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name)
;
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
index bd8aaf4411..11d4fa447f 100644
--- a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
+++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.chpgene_geneid_idx ON :SCHEMA.ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id)
+ create index chpgene_geneid_idx ON :SCHEMA.ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id)
;
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
index 9d0e711555..4f989590a9 100644
--- a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
+++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
@@ -1,9 +1,9 @@
- CREATE INDEX :SCHEMA.ChrCN_ix
+ CREATE ChrCN_ix
ON :SCHEMA.ChrCopyNumbers (input_pan_id, na_sequence_id)
;
- CREATE INDEX :SCHEMA.ChrCN_output
+ CREATE ChrCN_output
ON :SCHEMA.ChrCopyNumbers (output_pan_id)
;
diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
index 7d3114121f..e580f236fd 100644
--- a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.CodSeq_ix on :SCHEMA.CodingSequence (source_id, project_id)
+ create index CodSeq_ix on :SCHEMA.CodingSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
index f4babf6466..9238b045e6 100644
--- a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
+++ b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.eqtlSpan_ix
+ create index eqtlSpan_ix
on :SCHEMA.eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score)
;
diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql
index 6dec9178b5..adac5ecebd 100644
--- a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql
+++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql
@@ -1,7 +1,7 @@
create index EstSumm_libOverlap_ix
- ON EstAlignmentGeneSummary
+ ON :SCHEMA.EstAlignmentGeneSummary
(library_id, percent_identity, is_consistent,
est_gene_overlap_length, percent_est_bases_aligned)
@@ -11,7 +11,7 @@
create index EstSumm_estSite_ix
- ON EstAlignmentGeneSummary
+ ON :SCHEMA.EstAlignmentGeneSummary
(target_sequence_source_id, target_start, target_end,
library_id)
diff --git a/Model/lib/psql/webtables/MO/EstAttributes_ix.psql b/Model/lib/psql/webtables/MO/EstAttributes_ix.psql
index 3708681405..eed35a9960 100644
--- a/Model/lib/psql/webtables/MO/EstAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/EstAttributes_ix.psql
@@ -1,6 +1,6 @@
- create unique index EstAttr_source_id ON EstAttributes (source_id)
+ create unique index EstAttr_source_id ON :SCHEMA.EstAttributes (source_id)
;
diff --git a/Model/lib/psql/webtables/MO/EstSequence_ix.psql b/Model/lib/psql/webtables/MO/EstSequence_ix.psql
index de699f5486..d124b417bc 100644
--- a/Model/lib/psql/webtables/MO/EstSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/EstSequence_ix.psql
@@ -1,3 +1,3 @@
- create index EstSeq_ix on EstSequence (source_id, project_id)
+ create index EstSeq_ix on :SCHEMA.EstSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
index bd524c4b7a..9f45a0abad 100644
--- a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
@@ -1,47 +1,47 @@
- CREATE UNIQUE INDEX :SCHEMA.GeneAttr_srcPrj
+ CREATE UNIQUE GeneAttr_srcPrj
ON :SCHEMA.GeneAttributes (source_id)
;
- CREATE INDEX :SCHEMA.GeneAttr_exon_ix
+ CREATE GeneAttr_exon_ix
ON :SCHEMA.GeneAttributes (exon_count, source_id, project_id)
;
- CREATE INDEX :SCHEMA.GeneAttr_loc_ix
+ CREATE GeneAttr_loc_ix
ON :SCHEMA.GeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated)
;
- CREATE INDEX :SCHEMA.GeneAttr_feat_ix
+ CREATE GeneAttr_feat_ix
ON :SCHEMA.GeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed)
;
- CREATE INDEX :SCHEMA.GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes (
+ CREATE GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes (
orthomcl_name, source_id, taxon_id, gene_type, na_feature_id,
na_sequence_id, start_min, end_max, organism, species,
product, project_id
)
;
- CREATE INDEX :SCHEMA.GeneAttr_ortholog_ix
+ CREATE GeneAttr_ortholog_ix
ON :SCHEMA.GeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id)
;
- CREATE INDEX :SCHEMA.GeneAttr_orgsrc_ix
+ CREATE GeneAttr_orgsrc_ix
ON :SCHEMA.GeneAttributes (organism, source_id, na_sequence_id, start_min, end_max)
;
- CREATE INDEX :SCHEMA.GeneAttr_prjsrc_ix
+ CREATE GeneAttr_prjsrc_ix
ON :SCHEMA.GeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0))
;
- CREATE INDEX :SCHEMA.GeneAttr_txid_ix
+ CREATE GeneAttr_txid_ix
ON :SCHEMA.GeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id)
;
- CREATE INDEX :SCHEMA.GeneAttr_ids_ix
+ CREATE GeneAttr_ids_ix
ON :SCHEMA.GeneAttributes (na_feature_id, source_id, project_id)
;
- CREATE INDEX :SCHEMA.GeneAttr_loc_intjunc_ix
+ CREATE GeneAttr_loc_intjunc_ix
ON :SCHEMA.GeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX)
;
diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
index 4cb38aaa7f..084742ec07 100644
--- a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
@@ -1,4 +1,4 @@
- CREATE INDEX :SCHEMA.GeneCN_ix
+ CREATE INDEX GeneCN_ix
ON :SCHEMA.GeneCopyNumbers (input_pan_id, na_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
index a2ef5c49ec..a065517030 100644
--- a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.ggtab_ix ON :SCHEMA.GeneGoTable
+ create ggtab_ix ON :SCHEMA.GeneGoTable
(source_id, project_id, go_id, transcript_ids, is_not, go_term_name,
ontology, source, evidence_code, reference, evidence_code_parameter, sort_key)
;
diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
index c295007639..89389667a0 100644
--- a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id)
+ create GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
index 14c8fdbd4a..3c0cff5069 100644
--- a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
@@ -1,10 +1,10 @@
- create index :SCHEMA.gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
+ create gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
;
- create index :SCHEMA.gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id)
+ create gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id)
;
- create index :SCHEMA.gijnew_txnloc_ix
+ create gijnew_txnloc_ix
on :SCHEMA.GeneIntronJunction
(taxon_id, na_sequence_id, segment_start, segment_end, is_reversed,
total_unique, total_isrpm, annotated_intron)
diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
index 927c41fb89..9bd71417ae 100644
--- a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
@@ -1,3 +1,3 @@
- CREATE INDEX :SCHEMA.GnMxIntGIJ_ix on :SCHEMA.GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id)
+ CREATE INDEX GnMxIntGIJ_ix on :SCHEMA.GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id)
;
diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
index 25fc836319..e4388deb01 100644
--- a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
+++ b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.gmd_ix
+ create gmd_ix
on :SCHEMA.GeneModelDump
(source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids)
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
index 709e8d2934..039749930e 100644
--- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
@@ -1,11 +1,11 @@
- create unique index :SCHEMA.pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id)
+ create unique pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id)
;
- create unique index :SCHEMA.SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id)
+ create unique SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id)
;
- create unique index :SCHEMA.SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id)
+ create unique SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id)
;
- create unique index :SCHEMA.SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id)
+ create unique SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id)
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
index 093edf9acb..0cfa5601ed 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
@@ -1,9 +1,9 @@
- CREATE INDEX :SCHEMA.GenSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id)
+ CREATE INDEX genSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id)
;
- CREATE INDEX :SCHEMA.GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence)
+ CREATE INDEX GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence)
;
- CREATE INDEX :SCHEMA.GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence)
+ CREATE INDEX GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence)
;
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
index ea41f21285..497278e368 100644
--- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id)
+ create GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
index 0b0ef12e90..8c5134b475 100644
--- a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
+++ b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
@@ -1,7 +1,7 @@
- create index :SCHEMA.GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source)
+ create GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source)
;
- create index :SCHEMA.GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary
+ create GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary
(ontology, gene_source_id, is_not, is_go_slim,
go_id, go_term_name, evidence_code, evidence_category)
;
diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
index 443efe39c8..4245fce828 100644
--- a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
+++ b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id)
+ create namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id)
;
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
index b018637f7d..d1f420c0df 100644
--- a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
@@ -1,3 +1,3 @@
-create unique index :SCHEMA.Organism_sourceId_idx ON :SCHEMA.OrganismAttributes (source_id)
+create unique Organism_sourceId_idx ON :SCHEMA.OrganismAttributes (source_id)
;
diff --git a/Model/lib/psql/webtables/MO/PANIO_ix.psql b/Model/lib/psql/webtables/MO/PANIO_ix.psql
index 90176e14d7..9bbb34ec21 100644
--- a/Model/lib/psql/webtables/MO/PANIO_ix.psql
+++ b/Model/lib/psql/webtables/MO/PANIO_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.painio2_iix on :SCHEMA.PANIO
+ create painio2_iix on :SCHEMA.PANIO
(input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
@@ -6,7 +6,7 @@
- create index :SCHEMA.painio2_oix on :SCHEMA.PANIO
+ create painio2_oix on :SCHEMA.PANIO
(output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
@@ -14,7 +14,7 @@
- create index :SCHEMA.painio2_otypeix on :SCHEMA.PANIO
+ create painio2_otypeix on :SCHEMA.PANIO
(output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id)
@@ -22,7 +22,7 @@
- create index :SCHEMA.painio2_itypeix on :SCHEMA.PANIO
+ create painio2_itypeix on :SCHEMA.PANIO
(input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id)
diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
index 2cc01784ca..40750a7f84 100644
--- a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
+++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.pgt_ix on :SCHEMA.PathwaysGeneTable
+ create pgt_ix on :SCHEMA.PathwaysGeneTable
(gene_source_id, project_id, pathway_source_id, pathway_name,
reactions, enzyme, expasy_url, pathway_source, exact_match)
diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
index fceecb4adf..d59cf5e21b 100644
--- a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
@@ -1,11 +1,11 @@
- create :SCHEMA.index psamp_ix
+ create index psamp_ix
on :SCHEMA.ProfileSamples
(dataset_name, profile_type, study_id, node_order_num,
protocol_app_node_id, profile_set_suffix, study_name,
node_type, protocol_app_node_name)
;
- create index :SCHEMA.psampstdy_ix
+ create index psampstdy_ix
on :SCHEMA.ProfileSamples
(study_name, node_type, profile_type, node_order_num,
protocol_app_node_id, profile_set_suffix, study_id,
diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/MO/Profile_ix.psql
index e4aebd5c31..46b746ade0 100644
--- a/Model/lib/psql/webtables/MO/Profile_ix.psql
+++ b/Model/lib/psql/webtables/MO/Profile_ix.psql
@@ -1,12 +1,12 @@
- create index :SCHEMA.exprof_idx
+ create exprof_idx
on :SCHEMA.Profile (source_id, profile_type, profile_set_name)
;
- create index :SCHEMA.profset_idx
+ create profset_idx
on :SCHEMA.Profile (profile_set_name, profile_type)
;
- create index :SCHEMA.srcdset_idx
+ create srcdset_idx
on :SCHEMA.Profile (source_id, dataset_subtype, dataset_type)
;
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
index 2faff407d3..3abf032197 100644
--- a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
@@ -1,6 +1,6 @@
- CREATE INDEX :SCHEMA.PA_sourceId ON :SCHEMA.ProteinAttributes (source_id)
+ CREATE INDEX PA_sourceId ON :SCHEMA.ProteinAttributes (source_id)
;
- CREATE INDEX :SCHEMA.PA_aaSequenceId ON :SCHEMA.ProteinAttributes (aa_sequence_id)
+ CREATE INDEX PA_aaSequenceId ON :SCHEMA.ProteinAttributes (aa_sequence_id)
;
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
index a11708b620..8dad2f7481 100644
--- a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.ProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id)
+ create index ProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql
index 9a55de605a..255767b473 100644
--- a/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql
+++ b/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql
@@ -1,7 +1,7 @@
- CREATE INDEX :SCHEMA.SequenceEnzymeClass_idx1 ON :SCHEMA.SequenceEnzymeClass (group_name, ec_number, description)
+ CREATE INDEX SequenceEnzymeClass_idx1 ON :SCHEMA.SequenceEnzymeClass (group_name, ec_number, description)
;
- CREATE INDEX :SCHEMA.SequenceEnzymeClass_idx2 ON :SCHEMA.SequenceEnzymeClass (full_id, ec_number, description)
+ CREATE INDEX SequenceEnzymeClass_idx2 ON :SCHEMA.SequenceEnzymeClass (full_id, ec_number, description)
;
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
index 5cd5d15432..404e82cc02 100644
--- a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
+++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
@@ -1,8 +1,8 @@
- CREATE INDEX :SCHEMA.SignalP1_ix
+ CREATE INDEX SignalP1_ix
ON :SCHEMA.SignalPeptideDomains (aa_sequence_id)
;
- CREATE INDEX :SCHEMA.SignalP2_ix
+ CREATE INDEX SignalP2_ix
ON :SCHEMA.SignalPeptideDomains (gene_source_id, transcript_source_id, end_max)
;
diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
index 5acc3e9f9b..7eee68e1d4 100644
--- a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
+++ b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql
@@ -1,6 +1,6 @@
- create index :SCHEMA.tfbs_geneid_idx ON :SCHEMA.TFBSGene (gene_source_id, tfbs_na_feature_id)
+ create index tfbs_geneid_idx ON :SCHEMA.TFBSGene (gene_source_id, tfbs_na_feature_id)
;
- create index :SCHEMA.geneid_tfbs_idx ON :SCHEMA.TFBSGene (tfbs_na_feature_id,gene_source_id)
+ create index geneid_tfbs_idx ON :SCHEMA.TFBSGene (tfbs_na_feature_id,gene_source_id)
;
diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
index 7b29c4aa03..e7005f8319 100644
--- a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
+++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.tax_ix
+ create index tax_ix
on :SCHEMA.Taxonomy
(organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
index 08b54452aa..dd1670b4a3 100644
--- a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
@@ -1,55 +1,55 @@
- CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_sourceId
- ON TranscriptAttributes (source_id)
+ CREATE UNIQUE INDEX TranscriptAttr_sourceId
+ ON :SCHEMA.TranscriptAttributes (source_id)
;
- CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_srcPrj
- ON TranscriptAttributes (source_id, gene_source_id, project_id)
+ CREATE UNIQUE INDEX TranscriptAttr_srcPrj
+ ON :SCHEMA.TranscriptAttributes (source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_genesrc
- ON TranscriptAttributes (gene_source_id, source_id, project_id)
+ CREATE UNIQUE INDEX TranscriptAttr_genesrc
+ ON :SCHEMA.TranscriptAttributes (gene_source_id, source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_exon_ix
- ON TranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id)
+ CREATE UNIQUE INDEX TranscriptAttr_exon_ix
+ ON :SCHEMA.TranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_loc_ix
+ CREATE UNIQUE INDEX TranscriptAttr_loc_ix
ON :SCHEMA.TranscriptAttributes
(na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id,
is_deprecated, source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_feat_ix
+ CREATE UNIQUE INDEX TranscriptAttr_feat_ix
ON :SCHEMA.TranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_geneid_ix
+ CREATE UNIQUE INDEX TranscriptAttr_geneid_ix
ON :SCHEMA.TranscriptAttributes (gene_id, source_id, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.TransAttr_orthoname_ix
+ CREATE UNIQUE INDEX TransAttr_orthoname_ix
ON :SCHEMA.TranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
;
- CREATE UNIQUE INDEX :SCHEMA.TransAttr_molwt_ix
+ CREATE UNIQUE INDEX TransAttr_molwt_ix
ON :SCHEMA.TranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id)
;
- CREATE INDEX :SCHEMA.TransAttr_ortholog_ix
+ CREATE INDEX TransAttr_ortholog_ix
ON :SCHEMA.TranscriptAttributes
(source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id)
;
- CREATE INDEX :SCHEMA.TransAttr_orgsrc_ix
+ CREATE INDEX TransAttr_orgsrc_ix
ON :SCHEMA.TranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max)
;
- CREATE INDEX :SCHEMA.TransAttr_lwrsrc_ix
+ CREATE INDEX TransAttr_lwrsrc_ix
ON :SCHEMA.TranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id)
;
- CREATE INDEX :SCHEMA.TransAttr_species_ix
+ CREATE INDEX TransAttr_species_ix
ON :SCHEMA.TranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id)
;
@@ -60,11 +60,11 @@
five_prime_utr_length, three_prime_utr_length)
;
- CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_genenaf
+ CREATE UNIQUE INDEX TranscriptAttr_genenaf
ON :SCHEMA.TranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id)
;
- CREATE INDEX :SCHEMA.TransAttr_locsIds_ix
+ CREATE INDEX TransAttr_locsIds_ix
ON :SCHEMA.TranscriptAttributes
(na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
index ad1c71a2fc..25e656ced9 100644
--- a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.GCent_loc_ix
+ create index GCent_loc_ix
on :SCHEMA.TranscriptCenDistance (genomic_sequence, centromere_distance)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
index 2b05b42f44..92f77af43c 100644
--- a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
@@ -1,11 +1,11 @@
- create index :SCHEMA.TranscriptPath_ix
+ create index TranscriptPath_ix
on :SCHEMA.TranscriptPathway
(gene_source_id, source_id, pathway_source_id,
pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway,
ec_number_pathway, pathway_source)
;
- create index :SCHEMA.TranscriptPathSource_ix
+ create index TranscriptPathSource_ix
on :SCHEMA.TranscriptPathway (pathway_source,
gene_source_id, source_id)
;
diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
index 1e1b36617d..98bb3137f9 100644
--- a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
@@ -1,3 +1,3 @@
- create index :SCHEMA.XScriptSeq_ix on :SCHEMA.TranscriptSequence (source_id, project_id)
+ create index XScriptSeq_ix on :SCHEMA.TranscriptSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
index 613aff3e51..964f234217 100644
--- a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
+++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
@@ -1,4 +1,4 @@
- create index :SCHEMA.TransDom1_ix
+ create index TransDom1_ix
on :SCHEMA.TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology)
;
From f7441155e93c35c64c1f47ebd957e27e63987911 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Mon, 19 May 2025 13:57:49 -0400
Subject: [PATCH 024/112] drop table and clean Ks
---
.../{MO => K}/DatasetExampleSourceId.psql | 0
.../{MO => K}/DatasetExampleSourceId_ix.psql | 0
.../{MO => K}/OrganismAbbreviationBlast.psql | 0
.../OrganismAbbreviationBlast_ix.psql | 0
.../{MO => K}/OrganismAttributes.psql | 0
.../{MO => K}/OrganismAttributes_ix.psql | 0
.../lib/psql/webtables/{MO => K}/Profile.psql | 0
.../webtables/{MO => K}/ProfileSamples.psql | 0
.../{MO => K}/ProfileSamples_ix.psql | 0
.../psql/webtables/{MO => K}/ProfileType.psql | 0
.../webtables/{MO => K}/ProfileType_ix.psql | 0
.../psql/webtables/{MO => K}/Profile_ix.psql | 0
.../lib/psql/webtables/MG/OntologyLevels.psql | 6 ++++-
.../psql/webtables/MG/PathwayAttributes.psql | 4 ++-
Model/lib/psql/webtables/MG/PathwayNodes.psql | 26 +++++++++++++++----
.../webtables/MO/EstAlignmentGeneSummary.psql | 4 +--
Model/lib/psql/webtables/MO/GeneId.psql | 2 ++
.../lib/psql/webtables/MO/NameMappingGIJ.psql | 8 +++---
.../psql/webtables/MO/ProteinAttributes.psql | 7 ++---
.../webtables/MO/SequencePieceClosure.psql | 3 ++-
.../webtables/MO/TranscriptAttributes.psql | 4 +--
21 files changed, 45 insertions(+), 19 deletions(-)
rename Model/lib/psql/webtables/{MO => K}/DatasetExampleSourceId.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/DatasetExampleSourceId_ix.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/OrganismAbbreviationBlast.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/OrganismAbbreviationBlast_ix.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/OrganismAttributes.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/OrganismAttributes_ix.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/Profile.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/ProfileSamples.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/ProfileSamples_ix.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/ProfileType.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/ProfileType_ix.psql (100%)
rename Model/lib/psql/webtables/{MO => K}/Profile_ix.psql (100%)
diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql b/Model/lib/psql/webtables/K/DatasetExampleSourceId.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql
rename to Model/lib/psql/webtables/K/DatasetExampleSourceId.psql
diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql b/Model/lib/psql/webtables/K/DatasetExampleSourceId_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql
rename to Model/lib/psql/webtables/K/DatasetExampleSourceId_ix.psql
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/K/OrganismAbbreviationBlast.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql
rename to Model/lib/psql/webtables/K/OrganismAbbreviationBlast.psql
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql b/Model/lib/psql/webtables/K/OrganismAbbreviationBlast_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql
rename to Model/lib/psql/webtables/K/OrganismAbbreviationBlast_ix.psql
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/K/OrganismAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/OrganismAttributes.psql
rename to Model/lib/psql/webtables/K/OrganismAttributes.psql
diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/K/OrganismAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql
rename to Model/lib/psql/webtables/K/OrganismAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/MO/Profile.psql b/Model/lib/psql/webtables/K/Profile.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/Profile.psql
rename to Model/lib/psql/webtables/K/Profile.psql
diff --git a/Model/lib/psql/webtables/MO/ProfileSamples.psql b/Model/lib/psql/webtables/K/ProfileSamples.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ProfileSamples.psql
rename to Model/lib/psql/webtables/K/ProfileSamples.psql
diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/K/ProfileSamples_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ProfileSamples_ix.psql
rename to Model/lib/psql/webtables/K/ProfileSamples_ix.psql
diff --git a/Model/lib/psql/webtables/MO/ProfileType.psql b/Model/lib/psql/webtables/K/ProfileType.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ProfileType.psql
rename to Model/lib/psql/webtables/K/ProfileType.psql
diff --git a/Model/lib/psql/webtables/MO/ProfileType_ix.psql b/Model/lib/psql/webtables/K/ProfileType_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ProfileType_ix.psql
rename to Model/lib/psql/webtables/K/ProfileType_ix.psql
diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/K/Profile_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/Profile_ix.psql
rename to Model/lib/psql/webtables/K/Profile_ix.psql
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql
index 47df6e98e2..2a80ec0fb4 100644
--- a/Model/lib/psql/webtables/MG/OntologyLevels.psql
+++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql
@@ -1,10 +1,14 @@
- CREATE UNLOGGED TABLE :SCHEMA.Is_a_links AS
+ DROP TABLE IF EXISTS :SCHEMA.Is_a_links;
+
+ CREATE UNLOGGED TABLE :SCHEMA.Is_a_links AS
SELECT subject_term_id, object_term_id
FROM sres.OntologyRelationship rel, sres.OntologyTerm pred
WHERE rel.predicate_term_id = pred.ontology_term_id
AND pred.name = 'is_a'
;
+ DROP TABLE IF EXISTS :SCHEMA.Roots;
+
CREATE UNLOGGED TABLE :SCHEMA.Roots AS
SELECT object_term_id FROM :SCHEMA.is_a_links
EXCEPT
diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webtables/MG/PathwayAttributes.psql
index e976027524..5c8567fffe 100644
--- a/Model/lib/psql/webtables/MG/PathwayAttributes.psql
+++ b/Model/lib/psql/webtables/MG/PathwayAttributes.psql
@@ -1,4 +1,6 @@
- CREATE TABLE :SCHEMA.PathwayAttributes as
+ drop table if exists :SCHEMA.PathwayAttributes;
+
+ CREATE TABLE :SCHEMA.PathwayAttributes as
SELECT
p.source_id
, p.pathway_id
diff --git a/Model/lib/psql/webtables/MG/PathwayNodes.psql b/Model/lib/psql/webtables/MG/PathwayNodes.psql
index 7b8740ca5b..87a255e6ae 100644
--- a/Model/lib/psql/webtables/MG/PathwayNodes.psql
+++ b/Model/lib/psql/webtables/MG/PathwayNodes.psql
@@ -1,4 +1,6 @@
- CREATE UNLOGGED TABLE :SCHEMA.NodesWithTypes AS
+ DROP TABLE IF EXISTS :SCHEMA.NodesWithTypes;
+
+ CREATE UNLOGGED TABLE :SCHEMA.NodesWithTypes AS
SELECT pn.pathway_id
, CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS display_label
, pa.url
@@ -83,7 +85,9 @@
WHERE ot.name = 'molecular entity'
;
- CREATE UNLOGGED TABLE :SCHEMA.ReactionsWithReversibility AS
+ DROP TABLE IF EXISTS :SCHEMA.ReactionsWithReversibility;
+
+ CREATE UNLOGGED TABLE :SCHEMA.ReactionsWithReversibility AS
SELECT DISTINCT spr.pathway_relationship_id
, tpr.is_reversible
, tpr.reaction_source_id
@@ -94,7 +98,9 @@
AND tpr.reaction_id = prr.pathway_reaction_id
;
- CREATE UNLOGGED TABLE :SCHEMA.EnzymeEdges AS
+ DROP TABLE IF EXISTS :SCHEMA.EnzymeEdges;
+
+ CREATE UNLOGGED TABLE :SCHEMA.EnzymeEdges AS
SELECT DISTINCT nwt.pathway_id AS pathway_id
, nwt.pathway_node_id AS e_id
, nwt.type
@@ -117,7 +123,9 @@
AND rri.reaction_source_id = rro.reaction_source_id
;
- CREATE UNLOGGED TABLE :SCHEMA.ParentNodes AS
+ DROP TABLE IF EXISTS :SCHEMA.ParentNodes;
+
+ CREATE UNLOGGED TABLE :SCHEMA.ParentNodes AS
WITH AllEnzymeEdges AS (
SELECT string_agg(io, ',' ORDER BY io) AS all_edges
, e_id
@@ -141,6 +149,8 @@
WHERE aee.all_edges = pn.all_edges
;
+ DROP TABLE IF EXISTS :SCHEMA.NodesWithParents;
+
CREATE UNLOGGED TABLE :SCHEMA.NodesWithParents AS
SELECT DISTINCT ee.e_id AS pathway_node_id
, pn.parent
@@ -152,6 +162,8 @@
AND ee.e_id = pn.e_id
;
+ DROP TABLE IF EXISTS :SCHEMA.EnzymeReactions;
+
CREATE UNLOGGED TABLE :SCHEMA.EnzymeReactions AS
SELECT DISTINCT pn.PATHWAY_NODE_ID node_id
, pr.SOURCE_ID AS reaction_source_id
@@ -167,6 +179,8 @@
AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID
;
+ DROP TABLE IF EXISTS :SCHEMA.ParentsForEdges;
+
CREATE UNLOGGED TABLE :SCHEMA.ParentsForEdges AS
SELECT ee.e_id
, ee.m1_id
@@ -179,7 +193,9 @@
WHERE ee.e_id = np.pathway_node_id
;
- CREATE TABLE :SCHEMA.PathwayEdges AS
+ DROP TABLE IF EXISTS :SCHEMA.PathwayEdges;
+
+ CREATE UNLOGGED TABLE :SCHEMA.PathwayEdges AS
SELECT pa.source_id
, pa.pathway_source
, rel.*
diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
index 4dbff1eca8..dc2936487b 100644
--- a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
+++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
@@ -1,4 +1,4 @@
-
+ drop table if exists :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp;
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp AS
SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession,
@@ -24,7 +24,7 @@
;
-
+ drop table if exists :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp;
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp AS
SELECT * from :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp WHERE 1=0 UNION /* define datatype for null column */
diff --git a/Model/lib/psql/webtables/MO/GeneId.psql b/Model/lib/psql/webtables/MO/GeneId.psql
index 7feaf7ebc2..75b1bfdab5 100644
--- a/Model/lib/psql/webtables/MO/GeneId.psql
+++ b/Model/lib/psql/webtables/MO/GeneId.psql
@@ -1,3 +1,5 @@
+drop table if exists :SCHEMA.:ORG_ABBREVGeneFeatureTmp;
+
create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as
(select gf.na_feature_id
, gf.na_sequence_id
diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ.psql
index edc7d78f8e..678512b373 100644
--- a/Model/lib/psql/webtables/MO/NameMappingGIJ.psql
+++ b/Model/lib/psql/webtables/MO/NameMappingGIJ.psql
@@ -1,6 +1,6 @@
-
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVJunExpGIJtmp;
- CREATE UNLOGGED TABLE JunExpGIJtmp AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunExpGIJtmp AS
WITH ij AS (
SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name,
regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name
@@ -47,9 +47,9 @@
;
-
+ drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
- CREATE UNLOGGED TABLE MappingStatsGIJtmp (
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
junctions_pan_id,
read_length,
mapped_reads,
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
index 645a1edfae..a5cfad9f82 100644
--- a/Model/lib/psql/webtables/MO/ProteinAttributes.psql
+++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql
@@ -1,4 +1,4 @@
-
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVGoTermList_tmp;
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGoTermList_tmp AS
SELECT aa_sequence_id, ontology, source,
@@ -15,7 +15,7 @@
;
-
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp;
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp AS
SELECT DISTINCT gts.aa_sequence_id,
@@ -85,7 +85,7 @@
;
-
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp;
CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp AS
SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers
@@ -101,6 +101,7 @@
;
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp;
--TODO: these rows will not exist in org specific land
-- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS
diff --git a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql
index a7fc7c39b4..6da04c90fb 100644
--- a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql
+++ b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql
@@ -1,4 +1,5 @@
- CREATE TABLE :ORG_ABBREVSequencePieceClosure AS
+
+ CREATE TABLE :ORG_ABBREVSequencePieceClosure AS
SELECT sp.sequence_piece_id,
sp.virtual_na_sequence_id,
sp.piece_na_sequence_id,
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
index e82dea4d5b..cc70739339 100644
--- a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
+++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql
@@ -1,6 +1,6 @@
-\
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp;
- CREATE table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp AS
+ CREATE UNLOGGED table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp AS
select na_feature_id,
substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id,
substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal
From 02d596663486e2122959167e24ac27c9047ee145 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Mon, 19 May 2025 18:32:08 -0400
Subject: [PATCH 025/112] rename webready folders
---
Model/lib/psql/{webtables => webready}/UK/AssociatedDataset.psql | 0
Model/lib/psql/{webtables => webready}/UK/DomainAssignment.psql | 0
Model/lib/psql/{webtables => webready}/UK/EdaGeneGraph.psql | 0
Model/lib/psql/{webtables => webready}/UK/PANResults.psql | 0
Model/lib/psql/{webtables => webready}/UK/ProjectTaxon.psql | 0
Model/lib/psql/{webtables => webready}/UK/TypeAheadCounts.psql | 0
.../{webtables/UK => webready/comparative}/AlphaFoldGenes.psql | 0
.../UK => webready/comparative}/GroupPhylogeneticProfile.psql | 0
.../UK => webready/comparative}/OrthologousTranscripts.psql | 0
.../{webtables/UK => webready/comparative}/PhyleticPattern.psql | 0
.../{webtables/MG => webready/global}/CompoundAttributes.psql | 0
.../{webtables/MG => webready/global}/CompoundAttributes_ix.psql | 0
Model/lib/psql/{webtables/MG => webready/global}/CompoundId.psql | 0
.../lib/psql/{webtables/MG => webready/global}/CompoundId_ix.psql | 0
.../{webtables/MG => webready/global}/CompoundProperties.psql | 0
.../{webtables/MG => webready/global}/CompoundTypeAheads.psql | 0
.../{webtables/MG => webready/global}/GroupDomainAttribute.psql | 0
.../MG => webready/global}/GroupDomainAttribute_ix.psql | 0
.../psql/{webtables/MG => webready/global}/OntologyLevels.psql | 0
.../psql/{webtables/MG => webready/global}/OntologyLevels_ix.psql | 0
.../psql/{webtables/MG => webready/global}/PathwayAttributes.psql | 0
.../{webtables/MG => webready/global}/PathwayAttributes_ix.psql | 0
.../psql/{webtables/MG => webready/global}/PathwayCompounds.psql | 0
.../{webtables/MG => webready/global}/PathwayCompounds_ix.psql | 0
.../lib/psql/{webtables/MG => webready/global}/PathwayNodes.psql | 0
.../psql/{webtables/MG => webready/global}/PathwayReactions.psql | 0
.../{webtables/MG => webready/global}/PathwayReactions_ix.psql | 0
.../{webtables/UK => webready/global}/SequenceAttributes.psql | 0
.../{webtables/K => webready/keep}/DatasetExampleSourceId.psql | 0
.../{webtables/K => webready/keep}/DatasetExampleSourceId_ix.psql | 0
.../{webtables/K => webready/keep}/OrganismAbbreviationBlast.psql | 0
.../K => webready/keep}/OrganismAbbreviationBlast_ix.psql | 0
.../psql/{webtables/K => webready/keep}/OrganismAttributes.psql | 0
.../{webtables/K => webready/keep}/OrganismAttributes_ix.psql | 0
Model/lib/psql/{webtables/K => webready/keep}/Profile.psql | 0
Model/lib/psql/{webtables/K => webready/keep}/ProfileSamples.psql | 0
.../psql/{webtables/K => webready/keep}/ProfileSamples_ix.psql | 0
Model/lib/psql/{webtables/K => webready/keep}/ProfileType.psql | 0
Model/lib/psql/{webtables/K => webready/keep}/ProfileType_ix.psql | 0
Model/lib/psql/{webtables/K => webready/keep}/Profile_ix.psql | 0
.../MO => webready/orgSpecific}/ChIPchipTranscript.psql | 0
.../MO => webready/orgSpecific}/ChIPchipTranscript_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/ChrCopyNumbers.psql | 0
.../{webtables/MO => webready/orgSpecific}/ChrCopyNumbers_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/CodingSequence.psql | 0
.../{webtables/MO => webready/orgSpecific}/CodingSequence_ix.psql | 0
.../lib/psql/{webtables/MO => webready/orgSpecific}/EqtlSpan.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/EqtlSpan_ix.psql | 0
.../MO => webready/orgSpecific}/EstAlignmentGeneSummary.psql | 0
.../MO => webready/orgSpecific}/EstAlignmentGeneSummary_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/EstAttributes.psql | 0
.../{webtables/MO => webready/orgSpecific}/EstAttributes_ix.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/EstSequence.psql | 0
.../{webtables/MO => webready/orgSpecific}/EstSequence_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneAttributes.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneAttributes_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneCopyNumbers.psql | 0
.../MO => webready/orgSpecific}/GeneCopyNumbers_ix.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/GeneGoTable.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneGoTable_ix.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/GeneGoTerms.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneGoTerms_ix.psql | 0
Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneId.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/GeneId_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneIntJuncStats.psql | 0
.../MO => webready/orgSpecific}/GeneIntJuncStats_ix.psql | 0
.../MO => webready/orgSpecific}/GeneIntronJunction.psql | 0
.../MO => webready/orgSpecific}/GeneIntronJunction_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneLocations.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneLocations_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneMaxIntronGIJ.psql | 0
.../MO => webready/orgSpecific}/GeneMaxIntronGIJ_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneModelDump.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneModelDump_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GeneSummaryFilter.psql | 0
.../MO => webready/orgSpecific}/GeneSummaryFilter_ix.psql | 0
.../MO => webready/orgSpecific}/GenomicSeqAttributes.psql | 0
.../MO => webready/orgSpecific}/GenomicSeqAttributes_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GenomicSequenceId.psql | 0
.../MO => webready/orgSpecific}/GenomicSequenceId_ix.psql | 0
.../MO => webready/orgSpecific}/GenomicSequenceSequence.psql | 0
.../MO => webready/orgSpecific}/GenomicSequenceSequence_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/GoTermSummary.psql | 0
.../{webtables/MO => webready/orgSpecific}/GoTermSummary_ix.psql | 0
.../MO => webready/orgSpecific}/IntronSupportLevel.psql | 0
.../MO => webready/orgSpecific}/IntronSupportLevel_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/IntronUtrCoords.psql | 0
.../MO => webready/orgSpecific}/IntronUtrCoords_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/NameMappingGIJ.psql | 0
.../{webtables/MO => webready/orgSpecific}/NameMappingGIJ_ix.psql | 0
.../MO => webready/orgSpecific}/OrganismAbbreviation.psql | 0
.../MO => webready/orgSpecific}/OrganismAbbreviation_ix.psql | 0
.../MO => webready/orgSpecific}/OrganismSelectTaxonRank.psql | 0
.../MO => webready/orgSpecific}/OrganismSelectTaxonRank_ix.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/PANExtDbRls.psql | 0
Model/lib/psql/{webtables/MO => webready/orgSpecific}/PANIO.psql | 0
.../lib/psql/{webtables/MO => webready/orgSpecific}/PANIO_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/PathwayNodeGene.psql | 0
.../MO => webready/orgSpecific}/PathwayNodeGene_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/PathwaysGeneTable.psql | 0
.../MO => webready/orgSpecific}/PathwaysGeneTable_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/PdbSimilarity.psql | 0
.../{webtables/MO => webready/orgSpecific}/PdbSimilarity_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/ProteinAttributes.psql | 0
.../MO => webready/orgSpecific}/ProteinAttributes_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/ProteinSequence.psql | 0
.../MO => webready/orgSpecific}/ProteinSequence_ix.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/RnaSeqStats.psql | 0
.../{webtables/MO => webready/orgSpecific}/RnaSeqStats_ix.psql | 0
.../MO => webready/orgSpecific}/SequenceEnzymeClass.psql | 0
.../MO => webready/orgSpecific}/SequenceEnzymeClass_ix.psql | 0
.../MO => webready/orgSpecific}/SequencePieceClosure.psql | 0
.../MO => webready/orgSpecific}/SignalPeptideDomains.psql | 0
.../MO => webready/orgSpecific}/SignalPeptideDomains_ix.psql | 0
.../lib/psql/{webtables/MO => webready/orgSpecific}/TFBSGene.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/TFBSGene_ix.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/TaxonSpecies.psql | 0
.../{webtables/MO => webready/orgSpecific}/TaxonSpecies_ix.psql | 0
.../lib/psql/{webtables/MO => webready/orgSpecific}/Taxonomy.psql | 0
.../psql/{webtables/MO => webready/orgSpecific}/Taxonomy_ix.psql | 0
.../MO => webready/orgSpecific}/TranscriptAttributes.psql | 0
.../MO => webready/orgSpecific}/TranscriptAttributes_ix.psql | 0
.../MO => webready/orgSpecific}/TranscriptCenDistance.psql | 0
.../MO => webready/orgSpecific}/TranscriptCenDistance_ix.psql | 0
.../{webtables/MO => webready/orgSpecific}/TranscriptPathway.psql | 0
.../MO => webready/orgSpecific}/TranscriptPathway_ix.psql | 0
.../MO => webready/orgSpecific}/TranscriptSequence.psql | 0
.../MO => webready/orgSpecific}/TranscriptSequence_ix.psql | 0
.../MO => webready/orgSpecific}/TransmembraneDomains.psql | 0
.../MO => webready/orgSpecific}/TransmembraneDomains_ix.psql | 0
130 files changed, 0 insertions(+), 0 deletions(-)
rename Model/lib/psql/{webtables => webready}/UK/AssociatedDataset.psql (100%)
rename Model/lib/psql/{webtables => webready}/UK/DomainAssignment.psql (100%)
rename Model/lib/psql/{webtables => webready}/UK/EdaGeneGraph.psql (100%)
rename Model/lib/psql/{webtables => webready}/UK/PANResults.psql (100%)
rename Model/lib/psql/{webtables => webready}/UK/ProjectTaxon.psql (100%)
rename Model/lib/psql/{webtables => webready}/UK/TypeAheadCounts.psql (100%)
rename Model/lib/psql/{webtables/UK => webready/comparative}/AlphaFoldGenes.psql (100%)
rename Model/lib/psql/{webtables/UK => webready/comparative}/GroupPhylogeneticProfile.psql (100%)
rename Model/lib/psql/{webtables/UK => webready/comparative}/OrthologousTranscripts.psql (100%)
rename Model/lib/psql/{webtables/UK => webready/comparative}/PhyleticPattern.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/CompoundAttributes.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/CompoundAttributes_ix.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/CompoundId.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/CompoundId_ix.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/CompoundProperties.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/CompoundTypeAheads.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/GroupDomainAttribute.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/GroupDomainAttribute_ix.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/OntologyLevels.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/OntologyLevels_ix.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/PathwayAttributes.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/PathwayAttributes_ix.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/PathwayCompounds.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/PathwayCompounds_ix.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/PathwayNodes.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/PathwayReactions.psql (100%)
rename Model/lib/psql/{webtables/MG => webready/global}/PathwayReactions_ix.psql (100%)
rename Model/lib/psql/{webtables/UK => webready/global}/SequenceAttributes.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/DatasetExampleSourceId.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/DatasetExampleSourceId_ix.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/OrganismAbbreviationBlast.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/OrganismAbbreviationBlast_ix.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/OrganismAttributes.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/OrganismAttributes_ix.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/Profile.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/ProfileSamples.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/ProfileSamples_ix.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/ProfileType.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/ProfileType_ix.psql (100%)
rename Model/lib/psql/{webtables/K => webready/keep}/Profile_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ChIPchipTranscript.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ChIPchipTranscript_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ChrCopyNumbers.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ChrCopyNumbers_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/CodingSequence.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/CodingSequence_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EqtlSpan.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EqtlSpan_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstAlignmentGeneSummary.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstAlignmentGeneSummary_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstAttributes.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstAttributes_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstSequence.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstSequence_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneAttributes.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneAttributes_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneCopyNumbers.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneCopyNumbers_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneGoTable.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneGoTable_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneGoTerms.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneGoTerms_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneId.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneId_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneIntJuncStats.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneIntJuncStats_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneIntronJunction.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneIntronJunction_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneLocations.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneLocations_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneMaxIntronGIJ.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneMaxIntronGIJ_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneModelDump.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneModelDump_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneSummaryFilter.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneSummaryFilter_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSeqAttributes.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSeqAttributes_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSequenceId.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSequenceId_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSequenceSequence.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSequenceSequence_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GoTermSummary.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GoTermSummary_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/IntronSupportLevel.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/IntronSupportLevel_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/IntronUtrCoords.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/IntronUtrCoords_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/NameMappingGIJ.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/NameMappingGIJ_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/OrganismAbbreviation.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/OrganismAbbreviation_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/OrganismSelectTaxonRank.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/OrganismSelectTaxonRank_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PANExtDbRls.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PANIO.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PANIO_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PathwayNodeGene.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PathwayNodeGene_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PathwaysGeneTable.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PathwaysGeneTable_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PdbSimilarity.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PdbSimilarity_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ProteinAttributes.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ProteinAttributes_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ProteinSequence.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ProteinSequence_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/RnaSeqStats.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/RnaSeqStats_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SequenceEnzymeClass.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SequenceEnzymeClass_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SequencePieceClosure.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SignalPeptideDomains.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SignalPeptideDomains_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TFBSGene.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TFBSGene_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TaxonSpecies.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TaxonSpecies_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/Taxonomy.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/Taxonomy_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptAttributes.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptAttributes_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptCenDistance.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptCenDistance_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptPathway.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptPathway_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptSequence.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptSequence_ix.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TransmembraneDomains.psql (100%)
rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TransmembraneDomains_ix.psql (100%)
diff --git a/Model/lib/psql/webtables/UK/AssociatedDataset.psql b/Model/lib/psql/webready/UK/AssociatedDataset.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/AssociatedDataset.psql
rename to Model/lib/psql/webready/UK/AssociatedDataset.psql
diff --git a/Model/lib/psql/webtables/UK/DomainAssignment.psql b/Model/lib/psql/webready/UK/DomainAssignment.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/DomainAssignment.psql
rename to Model/lib/psql/webready/UK/DomainAssignment.psql
diff --git a/Model/lib/psql/webtables/UK/EdaGeneGraph.psql b/Model/lib/psql/webready/UK/EdaGeneGraph.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/EdaGeneGraph.psql
rename to Model/lib/psql/webready/UK/EdaGeneGraph.psql
diff --git a/Model/lib/psql/webtables/UK/PANResults.psql b/Model/lib/psql/webready/UK/PANResults.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/PANResults.psql
rename to Model/lib/psql/webready/UK/PANResults.psql
diff --git a/Model/lib/psql/webtables/UK/ProjectTaxon.psql b/Model/lib/psql/webready/UK/ProjectTaxon.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/ProjectTaxon.psql
rename to Model/lib/psql/webready/UK/ProjectTaxon.psql
diff --git a/Model/lib/psql/webtables/UK/TypeAheadCounts.psql b/Model/lib/psql/webready/UK/TypeAheadCounts.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/TypeAheadCounts.psql
rename to Model/lib/psql/webready/UK/TypeAheadCounts.psql
diff --git a/Model/lib/psql/webtables/UK/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/AlphaFoldGenes.psql
rename to Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
diff --git a/Model/lib/psql/webtables/UK/GroupPhylogeneticProfile.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/GroupPhylogeneticProfile.psql
rename to Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
diff --git a/Model/lib/psql/webtables/UK/OrthologousTranscripts.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/OrthologousTranscripts.psql
rename to Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
diff --git a/Model/lib/psql/webtables/UK/PhyleticPattern.psql b/Model/lib/psql/webready/comparative/PhyleticPattern.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/PhyleticPattern.psql
rename to Model/lib/psql/webready/comparative/PhyleticPattern.psql
diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes.psql b/Model/lib/psql/webready/global/CompoundAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/CompoundAttributes.psql
rename to Model/lib/psql/webready/global/CompoundAttributes.psql
diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql b/Model/lib/psql/webready/global/CompoundAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql
rename to Model/lib/psql/webready/global/CompoundAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/MG/CompoundId.psql b/Model/lib/psql/webready/global/CompoundId.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/CompoundId.psql
rename to Model/lib/psql/webready/global/CompoundId.psql
diff --git a/Model/lib/psql/webtables/MG/CompoundId_ix.psql b/Model/lib/psql/webready/global/CompoundId_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/CompoundId_ix.psql
rename to Model/lib/psql/webready/global/CompoundId_ix.psql
diff --git a/Model/lib/psql/webtables/MG/CompoundProperties.psql b/Model/lib/psql/webready/global/CompoundProperties.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/CompoundProperties.psql
rename to Model/lib/psql/webready/global/CompoundProperties.psql
diff --git a/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql b/Model/lib/psql/webready/global/CompoundTypeAheads.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/CompoundTypeAheads.psql
rename to Model/lib/psql/webready/global/CompoundTypeAheads.psql
diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql b/Model/lib/psql/webready/global/GroupDomainAttribute.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/GroupDomainAttribute.psql
rename to Model/lib/psql/webready/global/GroupDomainAttribute.psql
diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql b/Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql
rename to Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webready/global/OntologyLevels.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/OntologyLevels.psql
rename to Model/lib/psql/webready/global/OntologyLevels.psql
diff --git a/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql b/Model/lib/psql/webready/global/OntologyLevels_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/OntologyLevels_ix.psql
rename to Model/lib/psql/webready/global/OntologyLevels_ix.psql
diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webready/global/PathwayAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/PathwayAttributes.psql
rename to Model/lib/psql/webready/global/PathwayAttributes.psql
diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql b/Model/lib/psql/webready/global/PathwayAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql
rename to Model/lib/psql/webready/global/PathwayAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds.psql b/Model/lib/psql/webready/global/PathwayCompounds.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/PathwayCompounds.psql
rename to Model/lib/psql/webready/global/PathwayCompounds.psql
diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql b/Model/lib/psql/webready/global/PathwayCompounds_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql
rename to Model/lib/psql/webready/global/PathwayCompounds_ix.psql
diff --git a/Model/lib/psql/webtables/MG/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/PathwayNodes.psql
rename to Model/lib/psql/webready/global/PathwayNodes.psql
diff --git a/Model/lib/psql/webtables/MG/PathwayReactions.psql b/Model/lib/psql/webready/global/PathwayReactions.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/PathwayReactions.psql
rename to Model/lib/psql/webready/global/PathwayReactions.psql
diff --git a/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql b/Model/lib/psql/webready/global/PathwayReactions_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MG/PathwayReactions_ix.psql
rename to Model/lib/psql/webready/global/PathwayReactions_ix.psql
diff --git a/Model/lib/psql/webtables/UK/SequenceAttributes.psql b/Model/lib/psql/webready/global/SequenceAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/UK/SequenceAttributes.psql
rename to Model/lib/psql/webready/global/SequenceAttributes.psql
diff --git a/Model/lib/psql/webtables/K/DatasetExampleSourceId.psql b/Model/lib/psql/webready/keep/DatasetExampleSourceId.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/DatasetExampleSourceId.psql
rename to Model/lib/psql/webready/keep/DatasetExampleSourceId.psql
diff --git a/Model/lib/psql/webtables/K/DatasetExampleSourceId_ix.psql b/Model/lib/psql/webready/keep/DatasetExampleSourceId_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/DatasetExampleSourceId_ix.psql
rename to Model/lib/psql/webready/keep/DatasetExampleSourceId_ix.psql
diff --git a/Model/lib/psql/webtables/K/OrganismAbbreviationBlast.psql b/Model/lib/psql/webready/keep/OrganismAbbreviationBlast.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/OrganismAbbreviationBlast.psql
rename to Model/lib/psql/webready/keep/OrganismAbbreviationBlast.psql
diff --git a/Model/lib/psql/webtables/K/OrganismAbbreviationBlast_ix.psql b/Model/lib/psql/webready/keep/OrganismAbbreviationBlast_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/OrganismAbbreviationBlast_ix.psql
rename to Model/lib/psql/webready/keep/OrganismAbbreviationBlast_ix.psql
diff --git a/Model/lib/psql/webtables/K/OrganismAttributes.psql b/Model/lib/psql/webready/keep/OrganismAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/OrganismAttributes.psql
rename to Model/lib/psql/webready/keep/OrganismAttributes.psql
diff --git a/Model/lib/psql/webtables/K/OrganismAttributes_ix.psql b/Model/lib/psql/webready/keep/OrganismAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/OrganismAttributes_ix.psql
rename to Model/lib/psql/webready/keep/OrganismAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/K/Profile.psql b/Model/lib/psql/webready/keep/Profile.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/Profile.psql
rename to Model/lib/psql/webready/keep/Profile.psql
diff --git a/Model/lib/psql/webtables/K/ProfileSamples.psql b/Model/lib/psql/webready/keep/ProfileSamples.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/ProfileSamples.psql
rename to Model/lib/psql/webready/keep/ProfileSamples.psql
diff --git a/Model/lib/psql/webtables/K/ProfileSamples_ix.psql b/Model/lib/psql/webready/keep/ProfileSamples_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/ProfileSamples_ix.psql
rename to Model/lib/psql/webready/keep/ProfileSamples_ix.psql
diff --git a/Model/lib/psql/webtables/K/ProfileType.psql b/Model/lib/psql/webready/keep/ProfileType.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/ProfileType.psql
rename to Model/lib/psql/webready/keep/ProfileType.psql
diff --git a/Model/lib/psql/webtables/K/ProfileType_ix.psql b/Model/lib/psql/webready/keep/ProfileType_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/ProfileType_ix.psql
rename to Model/lib/psql/webready/keep/ProfileType_ix.psql
diff --git a/Model/lib/psql/webtables/K/Profile_ix.psql b/Model/lib/psql/webready/keep/Profile_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/K/Profile_ix.psql
rename to Model/lib/psql/webready/keep/Profile_ix.psql
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ChIPchipTranscript.psql
rename to Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql
rename to Model/lib/psql/webready/orgSpecific/ChIPchipTranscript_ix.psql
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ChrCopyNumbers.psql
rename to Model/lib/psql/webready/orgSpecific/ChrCopyNumbers.psql
diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql
rename to Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql
diff --git a/Model/lib/psql/webtables/MO/CodingSequence.psql b/Model/lib/psql/webready/orgSpecific/CodingSequence.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/CodingSequence.psql
rename to Model/lib/psql/webready/orgSpecific/CodingSequence.psql
diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/CodingSequence_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/CodingSequence_ix.psql
rename to Model/lib/psql/webready/orgSpecific/CodingSequence_ix.psql
diff --git a/Model/lib/psql/webtables/MO/EqtlSpan.psql b/Model/lib/psql/webready/orgSpecific/EqtlSpan.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/EqtlSpan.psql
rename to Model/lib/psql/webready/orgSpecific/EqtlSpan.psql
diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webready/orgSpecific/EqtlSpan_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/EqtlSpan_ix.psql
rename to Model/lib/psql/webready/orgSpecific/EqtlSpan_ix.psql
diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql
rename to Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql
diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql
rename to Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary_ix.psql
diff --git a/Model/lib/psql/webtables/MO/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/EstAttributes.psql
rename to Model/lib/psql/webready/orgSpecific/EstAttributes.psql
diff --git a/Model/lib/psql/webtables/MO/EstAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/EstAttributes_ix.psql
rename to Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/MO/EstSequence.psql b/Model/lib/psql/webready/orgSpecific/EstSequence.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/EstSequence.psql
rename to Model/lib/psql/webready/orgSpecific/EstSequence.psql
diff --git a/Model/lib/psql/webtables/MO/EstSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/EstSequence_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/EstSequence_ix.psql
rename to Model/lib/psql/webready/orgSpecific/EstSequence_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneAttributes.psql
rename to Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneAttributes_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql b/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneCopyNumbers.psql
rename to Model/lib/psql/webready/orgSpecific/GeneCopyNumbers.psql
diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneCopyNumbers_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneGoTable.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTable.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneGoTable.psql
rename to Model/lib/psql/webready/orgSpecific/GeneGoTable.psql
diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneGoTable_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTerms.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneGoTerms.psql
rename to Model/lib/psql/webready/orgSpecific/GeneGoTerms.psql
diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTerms_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneGoTerms_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneId.psql b/Model/lib/psql/webready/orgSpecific/GeneId.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneId.psql
rename to Model/lib/psql/webready/orgSpecific/GeneId.psql
diff --git a/Model/lib/psql/webtables/MO/GeneId_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneId_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneId_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneIntJuncStats.psql
rename to Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql
diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneIntronJunction.psql
rename to Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneLocations.psql b/Model/lib/psql/webready/orgSpecific/GeneLocations.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneLocations.psql
rename to Model/lib/psql/webready/orgSpecific/GeneLocations.psql
diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneLocations_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneLocations_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneLocations_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql
rename to Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql
diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneModelDump.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneModelDump.psql
rename to Model/lib/psql/webready/orgSpecific/GeneModelDump.psql
diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneModelDump_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql b/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneSummaryFilter.psql
rename to Model/lib/psql/webready/orgSpecific/GeneSummaryFilter.psql
diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GeneSummaryFilter_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql
rename to Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceId.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GenomicSequenceId.psql
rename to Model/lib/psql/webready/orgSpecific/GenomicSequenceId.psql
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceId_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GenomicSequenceId_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql
rename to Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence.psql
diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql
diff --git a/Model/lib/psql/webtables/MO/GoTermSummary.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GoTermSummary.psql
rename to Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/GoTermSummary_ix.psql
rename to Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql
diff --git a/Model/lib/psql/webtables/MO/IntronSupportLevel.psql b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/IntronSupportLevel.psql
rename to Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql
diff --git a/Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql
rename to Model/lib/psql/webready/orgSpecific/IntronSupportLevel_ix.psql
diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords.psql b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/IntronUtrCoords.psql
rename to Model/lib/psql/webready/orgSpecific/IntronUtrCoords.psql
diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql
rename to Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql
diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/NameMappingGIJ.psql
rename to Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql
rename to Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/OrganismAbbreviation.psql
rename to Model/lib/psql/webready/orgSpecific/OrganismAbbreviation.psql
diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql b/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql
rename to Model/lib/psql/webready/orgSpecific/OrganismAbbreviation_ix.psql
diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql
rename to Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank.psql
diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql b/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql
rename to Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank_ix.psql
diff --git a/Model/lib/psql/webtables/MO/PANExtDbRls.psql b/Model/lib/psql/webready/orgSpecific/PANExtDbRls.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PANExtDbRls.psql
rename to Model/lib/psql/webready/orgSpecific/PANExtDbRls.psql
diff --git a/Model/lib/psql/webtables/MO/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PANIO.psql
rename to Model/lib/psql/webready/orgSpecific/PANIO.psql
diff --git a/Model/lib/psql/webtables/MO/PANIO_ix.psql b/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PANIO_ix.psql
rename to Model/lib/psql/webready/orgSpecific/PANIO_ix.psql
diff --git a/Model/lib/psql/webtables/MO/PathwayNodeGene.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PathwayNodeGene.psql
rename to Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
diff --git a/Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql
rename to Model/lib/psql/webready/orgSpecific/PathwayNodeGene_ix.psql
diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PathwaysGeneTable.psql
rename to Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql
rename to Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql
diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity.psql b/Model/lib/psql/webready/orgSpecific/PdbSimilarity.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PdbSimilarity.psql
rename to Model/lib/psql/webready/orgSpecific/PdbSimilarity.psql
diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql b/Model/lib/psql/webready/orgSpecific/PdbSimilarity_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql
rename to Model/lib/psql/webready/orgSpecific/PdbSimilarity_ix.psql
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ProteinAttributes.psql
rename to Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql
rename to Model/lib/psql/webready/orgSpecific/ProteinAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ProteinSequence.psql
rename to Model/lib/psql/webready/orgSpecific/ProteinSequence.psql
diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/ProteinSequence_ix.psql
rename to Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql
diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats.psql b/Model/lib/psql/webready/orgSpecific/RnaSeqStats.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/RnaSeqStats.psql
rename to Model/lib/psql/webready/orgSpecific/RnaSeqStats.psql
diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql b/Model/lib/psql/webready/orgSpecific/RnaSeqStats_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql
rename to Model/lib/psql/webready/orgSpecific/RnaSeqStats_ix.psql
diff --git a/Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql b/Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql
rename to Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass.psql
diff --git a/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql
rename to Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass_ix.psql
diff --git a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql b/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/SequencePieceClosure.psql
rename to Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql b/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/SignalPeptideDomains.psql
rename to Model/lib/psql/webready/orgSpecific/SignalPeptideDomains.psql
diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql
rename to Model/lib/psql/webready/orgSpecific/SignalPeptideDomains_ix.psql
diff --git a/Model/lib/psql/webtables/MO/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TFBSGene.psql
rename to Model/lib/psql/webready/orgSpecific/TFBSGene.psql
diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TFBSGene_ix.psql
rename to Model/lib/psql/webready/orgSpecific/TFBSGene_ix.psql
diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TaxonSpecies.psql
rename to Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql
diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql b/Model/lib/psql/webready/orgSpecific/TaxonSpecies_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql
rename to Model/lib/psql/webready/orgSpecific/TaxonSpecies_ix.psql
diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/Taxonomy.psql
rename to Model/lib/psql/webready/orgSpecific/Taxonomy.psql
diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/Taxonomy_ix.psql
rename to Model/lib/psql/webready/orgSpecific/Taxonomy_ix.psql
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TranscriptAttributes.psql
rename to Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql
rename to Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql
diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql b/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TranscriptCenDistance.psql
rename to Model/lib/psql/webready/orgSpecific/TranscriptCenDistance.psql
diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql
rename to Model/lib/psql/webready/orgSpecific/TranscriptCenDistance_ix.psql
diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TranscriptPathway.psql
rename to Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql
rename to Model/lib/psql/webready/orgSpecific/TranscriptPathway_ix.psql
diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence.psql b/Model/lib/psql/webready/orgSpecific/TranscriptSequence.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TranscriptSequence.psql
rename to Model/lib/psql/webready/orgSpecific/TranscriptSequence.psql
diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptSequence_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql
rename to Model/lib/psql/webready/orgSpecific/TranscriptSequence_ix.psql
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TransmembraneDomains.psql
rename to Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql
diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains_ix.psql
similarity index 100%
rename from Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql
rename to Model/lib/psql/webready/orgSpecific/TransmembraneDomains_ix.psql
From 1b021a547242d6c7091fc2713917250c75b8a8c2 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Mon, 19 May 2025 18:32:42 -0400
Subject: [PATCH 026/112] rename webready folders
---
Model/lib/psql/webready/{UK => unknown}/AssociatedDataset.psql | 0
Model/lib/psql/webready/{UK => unknown}/DomainAssignment.psql | 0
Model/lib/psql/webready/{UK => unknown}/EdaGeneGraph.psql | 0
Model/lib/psql/webready/{UK => unknown}/PANResults.psql | 0
Model/lib/psql/webready/{UK => unknown}/ProjectTaxon.psql | 0
Model/lib/psql/webready/{UK => unknown}/TypeAheadCounts.psql | 0
6 files changed, 0 insertions(+), 0 deletions(-)
rename Model/lib/psql/webready/{UK => unknown}/AssociatedDataset.psql (100%)
rename Model/lib/psql/webready/{UK => unknown}/DomainAssignment.psql (100%)
rename Model/lib/psql/webready/{UK => unknown}/EdaGeneGraph.psql (100%)
rename Model/lib/psql/webready/{UK => unknown}/PANResults.psql (100%)
rename Model/lib/psql/webready/{UK => unknown}/ProjectTaxon.psql (100%)
rename Model/lib/psql/webready/{UK => unknown}/TypeAheadCounts.psql (100%)
diff --git a/Model/lib/psql/webready/UK/AssociatedDataset.psql b/Model/lib/psql/webready/unknown/AssociatedDataset.psql
similarity index 100%
rename from Model/lib/psql/webready/UK/AssociatedDataset.psql
rename to Model/lib/psql/webready/unknown/AssociatedDataset.psql
diff --git a/Model/lib/psql/webready/UK/DomainAssignment.psql b/Model/lib/psql/webready/unknown/DomainAssignment.psql
similarity index 100%
rename from Model/lib/psql/webready/UK/DomainAssignment.psql
rename to Model/lib/psql/webready/unknown/DomainAssignment.psql
diff --git a/Model/lib/psql/webready/UK/EdaGeneGraph.psql b/Model/lib/psql/webready/unknown/EdaGeneGraph.psql
similarity index 100%
rename from Model/lib/psql/webready/UK/EdaGeneGraph.psql
rename to Model/lib/psql/webready/unknown/EdaGeneGraph.psql
diff --git a/Model/lib/psql/webready/UK/PANResults.psql b/Model/lib/psql/webready/unknown/PANResults.psql
similarity index 100%
rename from Model/lib/psql/webready/UK/PANResults.psql
rename to Model/lib/psql/webready/unknown/PANResults.psql
diff --git a/Model/lib/psql/webready/UK/ProjectTaxon.psql b/Model/lib/psql/webready/unknown/ProjectTaxon.psql
similarity index 100%
rename from Model/lib/psql/webready/UK/ProjectTaxon.psql
rename to Model/lib/psql/webready/unknown/ProjectTaxon.psql
diff --git a/Model/lib/psql/webready/UK/TypeAheadCounts.psql b/Model/lib/psql/webready/unknown/TypeAheadCounts.psql
similarity index 100%
rename from Model/lib/psql/webready/UK/TypeAheadCounts.psql
rename to Model/lib/psql/webready/unknown/TypeAheadCounts.psql
From d5798edb5a5f91f3a145ff388f0b9f309c0e21bc Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 20 May 2025 14:46:46 -0400
Subject: [PATCH 027/112] prune tuning manager file
---
.../xml/tuningManager/apiTuningManager.xml | 10136 +++-------------
Model/lib/xml/tuningManager/tablePruning.txt | 4 +-
2 files changed, 1783 insertions(+), 8357 deletions(-)
diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml
index b7a4f3217b..6a783fcb6d 100644
--- a/Model/lib/xml/tuningManager/apiTuningManager.xml
+++ b/Model/lib/xml/tuningManager/apiTuningManager.xml
@@ -3,742 +3,6 @@
-
- Locations and Sequence of Transmembrane Domains (TMHMM)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Locations and Sequence of Signal Peptide Domains (SignalP)
-
-
-
-
-
-
-
-
-
- = .5
- OR spf.signal_probability IS NULL
- OR ((spf.means_score + spf.maxy_score) / 2) >= .5
- OR ( spf.maxy_conclusion + spf.maxc_conclusion + spf.maxs_conclusion + spf.means_conclusion ) >= 3
- )
- ORDER BY
- spf.aa_sequence_id, spf.aa_feature_id
- ]]>
-
-
-
-
-
-
-
-
-
-
- Taxon ranks for organisms
-
-
-
-
-
-
-
-
-
-
-
- Attributes for Metabolic Pathways
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- synteny stats for each reference-taxon / comparison-taxon pair
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- the max and min depth of each ontology term in OntologyRelationship. Used by the GoTermSummary tuning table
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- GeneGoTerms: each row represents one GO term assignment to one gene, right from what was loaded.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- A tuning table for the gene record GO term table
-
-
-
-
-
-
-
-
-
-
-
-
Map each GO term that is assigned to at least one gene to a GoSubset term
@@ -817,88 +81,6 @@
-
- GoTermSummary: each row represents one GO term assignment to one gene.
- (Typically, a gene has multiple such assignments.) This is used for
- finding gene-GO mappings, such as for the gene-page GO table.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
Each row maps a dataset onto an ID for which the dataset contains data;
each dataset gets one such row.
@@ -930,38 +112,6 @@
-
-
Stores per-organism information. Used by the organism record, as well
as by project_id(), the function that maps an organism to a project.
@@ -1320,7361 +470,2024 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
-
-
- Each record maps a gene to a PDB structure. Used by the model to find
- genes that have a PDB structure and to find the PDB structures for a
- given gene.
-
-
-
-
-
-
-
-
-
-
-
-
- GeneId maps any valid ID for a gene onto its official ID. These two quantities
- are stored in the "id" and "gene" columns, respectively. The "unique_mapping"
- column is set to 1 for IDs which map to only one gene.
-
- Most of the CREATE TABLE statement is made up of the union of nine subqueries,
- each of which looks in a different place for gene IDs. Each subquery populates
- the "union_member" field with a different literal string, to make it easier to
- understand which part (or parts) of the SQL is responsible for each ID-to-gene
- mapping.
+
+
+
+ Stores, for each transcript, a string containing the gene-relative coordinates
+ of all its introns and UTRs.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- = pred_loc.start_min
- AND pred_loc.is_reversed = gene_loc.is_reversed
- AND pred_loc.external_database_release_id = edr.external_database_release_id
- AND edr.external_database_id = ed.external_database_id
- UNION
- SELECT ng.name AS id, gf.source_id AS gene,
- 'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */
- FROM dots.GeneFeature gf, dots.NaFeatureNaGene nfng, dots.NaGene ng,
- sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
- WHERE gf.na_feature_id = nfng.na_feature_id
- AND ng.na_gene_id = nfng.na_gene_id
- AND gf.external_database_release_id = edr.external_database_release_id
- AND edr.external_database_id = ed.external_database_id
- UNION
- SELECT source_id AS id, source_id AS gene,
- 'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */
- FROM dots.GeneFeature gf,
- sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
- WHERE gf.external_database_release_id = edr.external_database_release_id
- AND edr.external_database_id = ed.external_database_id
- UNION
- SELECT n.name AS id, gf.source_id AS gene,
- 'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name
- from dots.genefeature gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
- ( select na_feature_id, name
- from apidb.GeneFeatureName
- where is_preferred = 1
- EXCEPT
- -- suppress gene/name associations from the *DELETED_RSRC databases
- select gfn.na_feature_id, gfn.name
- from apidb.GeneFeatureName gfn,
- sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
- where gfn.external_database_release_id = edr.external_database_release_id
- and ed.external_database_id = edr.external_database_id
- and ed.name like '%DELETED_RSRC'
- ) n
- where n.na_feature_id = gf.na_feature_id
- and gf.external_database_release_id = r.external_database_release_id
- and r.external_database_id = d.external_database_id
- UNION
- select dr.primary_identifier as id,
- gf.source_id as gene,
- 'AA feature DbRef primary ID' as union_member,
- ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */
- from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf,
- dots.DbRefAaFeature draf, sres.DbRef dr,
- sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
- where gf.na_feature_id = t.parent_id
- and t.na_feature_id = taf.na_feature_id
- and taf.aa_feature_id = draf.aa_feature_id
- and draf.db_ref_id = dr.db_ref_id
- and dr.external_database_release_id = edr.external_database_release_id
- and edr.external_database_id = ed.external_database_id
- and ed.name
- not in ('INTERPRO', 'PFAM', 'PIRSF', 'PRODOM', 'PROSITEPROFILES',
- 'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER',
- 'PRINTS','SCANPROSITE','SFLD')
- ) mapping,
- dots.GeneFeature gf, dots.NaSequence ns
- WHERE mapping.gene = gf.source_id
- AND gf.na_sequence_id = ns.na_sequence_id
- AND (ns.taxon_id::varchar = '&filterValue' or length('&filterValue') = 0)
- AND (gf.is_predicted != 1 OR gf.is_predicted is null)
- GROUP BY mapping.id, mapping.gene
- ]]>
-
-
-
-
+
-
-
- ]]>
-
-
-
-
+
+
+
+
+
+ Stores special webservice abbreviations which are not standard organism
+ names. Each record maps an organism name onto this abbreviation, as
+ well as the species name and project ID. Used by the model and as an
+ input in the creation of the OrganismAbbreviationBlast tuning table.
+ Propagated to portal instances.
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- This table maps IDs for a sequence onto the official ID of the sequence.
- It is analogous to GeneId, which does the same thing for genes. Used by
- genomic-sequence record queries, by the sequence retrieval tool, and by
- the BasketFixer, which updates users' baskets at release time to replace
- old IDs with updated ones.
+
+
+
+ Group species by higher level taxonomy. Each row associates a taxon of
+ interest with one of its ancestors in the taxon tree. Used in parameter
+ queries that have to know about the taxon tree. Propagated to portal
+ instances.
-
-
-
-
-
-
+
+
+
+
-
-
-
-
- Used by GeneTables.Epitopes to map a gene to its epitopes.
-
-
-
-
-
-
-
-
-
- Stores (transcript, sequence, distance from centromere) 3-tuples for transcripts
- that lie on a sequence for which we have a centomere location.
+
+ Each record maps an organism to its BLAST abbreviation. Used by
+ BLAST-query parameters. Propagated to portal instances.
-
-
-
-
-
-
+
+
+
+
-
- The BFMV for proteins. Each protein gets a single record, which
- stores all its attributes. Used mainly to create TranscriptAttributes
+
+ For each project, show which BLAST databases are available for which
+ species. Used in BLAST param queries. Propagated to portal instances.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+ Each row stores mass-spec. based expression evidence for one sample of
+ one experiment for one gene. Used for mass spec queries in the model,
+ GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary
+ tuning table.
+
+
+
+
+
+
+
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
-
- The BFMV for the gene record. Each gene gets a single record, which
- stores all its attributes. Used widely, in the model and elsewhere, for
- queries involving genes, as well as in the creation of more than a
- dozen other tuning tables.
+
+ Stores summary information from annotated genomes to facilitate overview section of gene page
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
-
-
-
-
-
-
-
- 10 )
- WHERE ta.project_id = 'TriTrypDB'
- ]]>
-
-
-
-
-
-
-
-
-
-
+
+ Mass-spec experiment results for a peptide. Used by the model, GBrowse,
+ and PBrowse.
+
+
+
+
+
+
+
-
-
- ]]>
-
-
-
-
+
+
+ Data from the Seattle Structural Genomics Center for Infectious Disease,
+ populated from their web service. Used in the gene record.
+
+
-
+
+ Used by the model and GBrowse, as well as an input in the creation of
+ the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary.
-
-
-
-
-
-
+
- UNION
- SELECT gene_source_id, source_id, taxon_id, organism, ontology_term_name
- , string_value, number_value
- FROM (
- select atr.gene_source_id, atr.source_id, atr.taxon_id, atr.organism,
- 'long_transcript_novelty' as ontology_term_name, ltr.transcript_novelty string_value,
- null as number_value, sum(counts.reads) as total_reads, ltr.transcript_length
- from TranscriptAttributes atr
- , apidb.longreadtranscript ltr
- , JSON_TABLE(count_data, '$.*' COLUMNS (reads INTEGER PATH '$')) counts
- where ltr.gene_source_id = atr.gene_source_id
- AND ltr.transcript_length >= 20
- GROUP BY atr.gene_source_id, atr.source_id, atr.taxon_id, atr.organism, ltr.transcript_novelty, ltr.transcript_length
- )
- WHERE total_reads >= 5
- UNION
- select atr.gene_source_id, atr.source_id, atr.taxon_id, atr.organism,
- 'intron_junction' as ontology_term_name, it.string_value string_value,
- null as number_value
- from
- IntronSupportLevel it
- ,TranscriptAttributes atr
- where it.gene_source_id = atr.gene_source_id
+
+
+ Used by the model when writing profile data
+
+
+
- UNION
- select atr.gene_source_id, atr.source_id, atr.taxon_id, atr.organism,
- 'unique_reads' as ontology_term_name, null as string_value , gj.total_unique number_value
- from
- GeneIntronJunction gj
- ,TranscriptAttributes atr
- where gj.gene_source_id = atr.gene_source_id
- )
- ]]>
-
+
+
+ Associates an organism with the GBrowse and PBrowse tracks available
+ for it. Used by the gene record.
+
+
+
+
--->
-
-
- Stores, for each transcript, a string containing the gene-relative coordinates
- of all its introns and UTRs.
+
+
+ Each row maps a dataset onto an ID for which the dataset contains data;
+ each dataset gets one such row.
+ Used in dataset record queries.
-
-
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
- A single product string per gene
-
-
-
-
-
-
-
-
+
+ Citation info for proteomics datasets, used by GBrowse
+
+
' || sample || '
' as sample_i
+ FROM MSPeptideSummary mps, DatasetPresenter ds
+ -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem
+ WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern)
+ ) t
+ group by name, id
+ )
+ SELECT name,
+ substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable')
+ || ' PMID: ' || publications || 'Samples:
'
+ || sample_table || chr(10) ||
+ ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation
+ FROM (
+ SELECT ds.name as name, ds.summary as description, pubs.contact_email as email,
+ pubs.pmids as publications, samples.sample_table as sample_table
+ FROM DatasetPresenter ds, pubs, samples
+ WHERE ds.dataset_presenter_id = pubs.id
+ AND ds.dataset_presenter_id = samples.id
+ ) t
]]>
-
-
- ]]>
-
+
+
+
+
+
+
+
+
+
+
+
-
- The BFMV for the gene record. Each gene gets a single record, which
- stores all its attributes.
-
+
+
+
+
-
-
-
-
-
-
-
+
+
+
-
-
- ]]>
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record represents one SNP. Widely used in the model, as well as in
- the creation of several other tuning tables, Includes only NGS SNPs.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each row represents one EST. Used widely in the model, and to make the
- tuning tables BlastTypes, OrganismAbbreviationBlast, and OrganismAttributes.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each row represents a colocated EST alignment - gene pair. Used by the
- model, by generateGeneMetrics, and in the creation of the
- OrganismAttributes tuning table
-
-
-
-
-
-
-
-
-
-
-
-
- = 0
- AND query_sequence.na_sequence_id = ba.query_na_sequence_id
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each row represents one cosmid or bac end feature; for use in JBrowse.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record captures info for a strain/protocol app node. Used in the model, including
- gene and SNP queries, as well as the gene record.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record captures info for a strain/protocol app node. Used in the model, including
- gene and SNP queries, as well as the gene record.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- The BFMV for the WDK popset record. Widely used in the model for
- queries related to popsets.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Used by the GenesByChipChip(Plasmo|Toxo) query, as well as by
- generateGeneMetrics. Also an input to OrganismAttributes.
-
-
-
-
-
-
- 0 */
- CASE WHEN ta.is_reversed = 0
- THEN ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)
- ELSE ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)
- END > 0
- THEN
- CASE
- WHEN ta.is_reversed = 0
- THEN '-'
- ELSE '+'
- END
- ELSE
- CASE
- WHEN ta.is_reversed = 1
- THEN '-'
- ELSE '+'
- END
- END as direction,
- sr.score1 as score
- FROM TranscriptAttributes ta,
- Results.segmentresult sr,
- Study.StudyLink sl,
- Study.Study s
- WHERE sr.na_sequence_id = ta.na_sequence_id
- AND s.study_id = sl.study_id
- AND sl.protocol_app_node_id = sr.protocol_app_node_id
- AND lower(s.name) like '%chip%peaks'
- AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000)
- or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) )
- ]]>
-
-
-
-
-
-
-
-
-
-
- Used by gene queries, as well as by generateGeneMetrics. Also an input
- to OrganismAttributes.
-
-
-
-
-
- 0 */
- CASE WHEN ga.is_reversed = 0
- THEN ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)
- ELSE ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)
- END > 0
- THEN
- CASE
- WHEN ga.is_reversed = 0
- THEN '-'
- ELSE '+'
- END
- ELSE
- CASE
- WHEN ga.is_reversed = 1
- THEN '-'
- ELSE '+'
- END
- END as direction,
- aef.*
- FROM dots.BindingSiteFeature aef,
- apidb.FeatureLocation arrloc,
- GeneAttributes ga
- WHERE aef.na_feature_id = arrloc.na_feature_id
- AND arrloc.na_sequence_id = ga.na_sequence_id
- AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000)
- or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) )
- ]]>
-
-
-
-
-
-
-
-
-
-
-
- Each record maps a gene onto a subcellular location. Used by
- GenesBySubcellularLocalization.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Like dots.SimilaritySpan, except that for sequences that are mapped by
- SequencePiece into parts of other sequences, both locations are stored.
- Used by GBrowse, and also in the creation of the Blastx tuning table.
-
-
-
-
-
-
-
- = sim.max_query_end
- AND sim.query_id = contig.na_sequence_id
- AND sp.virtual_na_sequence_id = scaffold.na_sequence_id
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
- SNP Chip only, such as Plasmo barcode, 3k_chp and hd_array
-
- Each record represents one SNP. Widely used in the model, as well as in
- the creation of several other tuning tables
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- this otherwise-unneeded tuning table, which depends on SnpAttributesDoTS,
- exists so that the view SnpChipAttributes can be created as a side-effect
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record stores a Blastp similarity of a gene. Used by the gene-page
- Blastp table.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record stores a colocated (gene, popset) 2-tuple. Used by the
- gene page as well as the PopsetByOverlap query.
-
-
-
-
-
- sim.min_subject_start
- AND sim.query_id = ia.na_sequence_id
- GROUP BY ia.source_id, fl.feature_source_id
- ]]>
-
-
-
-
-
-
-
-
-
-
-
- Each record maps a taxon_id of interest onto the taxon_id of that taxon's
- taxon-tree ancestor whose rank is "species". Used by
- gene queries, and as an input in the
- creation of several tuning tables, including GeneAttributes
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record stores a Blastx similarity. Used by GBrowse for the
- match:WU_BLASTX track.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record stores the transcript sequence of one gene. Used by the
- gene record and the sequence retrieval tool. Propagated to the portal.
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record stores the coding sequence of one gene. Used by the
- gene record and the sequence retrieval tool. Propagated to the portal.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record stores the coding sequence of one gene. Used by the
- gene record and the sequence retrieval tool, as well as by
- buildTrackOldAnnotationTT. Propagated to the portal.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record stores the nucleotide sequence for one genomic sequence
- that is "official" (in the sense that it can be instantiated as a WDK
- sequence record. Used by generatePathoLogicFile and the sequence
- retrieval tool Propagated to portal instances.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record stores the nucleotide sequence of an EST, for use by the
- relevant attribute query in the WDK EST record. Propagated to portal
- instances.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Mapping table of experiment and sample names to junction protocol_app_node_id
-
-
-
-
-
-
-
-
-
-
-
- = 1
- GROUP BY protocol_app_node_id
- ), part AS (
- SELECT
- ij.junctions_pan_id, ij.avg_value, stats.multiplier
- , max(ij.expression_pan_id) OVER w as max_exp_pan_id
- , max(ij.sample_name) OVER w as max_sample_Name
- , max(ij.exp_name) OVER w as max_exp_name
- FROM ij, stats
- WHERE ij.junctions_pan_id = stats.protocol_app_node_id
- WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
- )
- SELECT DISTINCT * FROM (
- SELECT junctions_pan_id
- , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
- , first_value(max_sample_name) OVER w1 as sample_name
- , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
- , multiplier
- FROM part
- WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
- ) t
- ORDER BY junctions_pan_id
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Table collects up single row / intronjunction (identified as all junctions with same start, end and strand). Statistics are generated including percentages of max intron score and ratios vs expression on an overall level.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 500000 THEN 500000 ELSE step_mult END as seq_step_mult
- FROM (
- SELECT gs.na_sequence_id, gs.length, gs.taxon_id, 25000 * (1 + floor(gs.length/count(*))) as step_mult
- FROM apidb.intronjunction ij, dots.nasequence gs
- WHERE gs.na_sequence_id = ij.na_sequence_id
- GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id
- ) t
- ORDER BY taxon_id
- )
- LOOP
- iter_length := idlist.seq_step_mult;
- i_first_pos := 1;
- i_last_pos := i_first_pos + iter_length;
- WHILE i_first_pos < idlist.length
- LOOP
- INSERT INTO GIJtmp
- SELECT DISTINCT
- junc.*,
- CASE
- WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed
- THEN 1
- ELSE 0
- END as matches_gene_strand,
- last_value(ga.source_id) over w1 as gene_source_id,
- last_value(ga.na_feature_id) over w1 as gene_na_feature_id,
- CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron
- FROM (
- SELECT ij.na_sequence_id,seq.source_id as sequence_source_id,ij.segment_start,ij.segment_end,
- sum(ij.unique_reads) as total_unique, round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm,
- ij.is_reversed,seq.source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id
- FROM apidb.intronjunction ij, namemappinggij je, dots.nasequence seq
- WHERE ij.na_sequence_id = idlist.na_sequence_id
- AND ij.segment_start between i_first_pos and i_last_pos
- AND ij.na_sequence_id = seq.na_sequence_id
- AND ij.unique_reads >= 1
- AND je.junctions_pan_id = ij.protocol_app_node_id
- AND je.multiplier < 20
- GROUP BY ij.na_sequence_id,ij.segment_start,ij.segment_end, ij.is_reversed,seq.source_id
- ) junc
- LEFT JOIN GeneIdLocGIJ&1 ga ON
- junc.na_sequence_id = ga.na_sequence_id
- AND junc.segment_start >= ga.start_min
- AND junc.segment_end <= ga.end_max
- AND junc.is_reversed = ga.is_reversed
- LEFT JOIN annotgij ag ON
- junc.na_sequence_id = ag.na_sequence_id
- AND junc.segment_start = ag.start_min
- AND junc.segment_end = ag.end_max
- AND junc.is_reversed = ag.is_reversed
- WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron')
- WINDOW w1 AS (
- PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type
- ORDER BY ga.total_expression ASC
- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
- )
- ;
- commit;
- i_first_pos := i_last_pos + 1;
- i_last_pos := i_first_pos + iter_length;
- END LOOP;
- END LOOP;
- END;
- $$ LANGUAGE PLPGSQL;
- ]]>
-
-
-
-
-
- 0 THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) ELSE null END as percent_max,
- CASE WHEN maxv.gene_source_id is not null THEN 1 ELSE 0 END as contained,
- CAST (null as numeric(10)) as taxon_id,
- cast (null as numeric(10)) as upstream_gene_id,
- cast (null as numeric) as upstream_distance,
- cast (null as numeric(10)) as downstream_gene_id,
- cast (null as numeric) as downstream_distance
- FROM
- gijtmp junc LEFT JOIN
- (
- SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm
- FROM gijtmp
- WHERE gene_source_id is not null
- GROUP BY gene_source_id
- ) maxv ON junc.gene_source_id = maxv.gene_source_id
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Stores statistics for annotated introns used for configuring JBrowse tracks based on organism.
-
-
-
-
-
-
-
-
-
-
-
- Stores maximum values per gene for each sample so percent max intron can be computed for sample table.
-
-
-
-
-
-
-
-
- = j.segment_end
- AND ga.is_reversed = j.is_reversed
- AND j.protocol_app_node_id = mult.junctions_pan_id
- GROUP BY j.protocol_app_node_id, ga.source_id
- );
- commit;
- END LOOP;
- END;
- $$ LANGUAGE PLPGSQL;
- ]]>
-
-
-
-
-
-
-
- Each record stores the nucleotide sequence of one popset. Used in the
- relevant attribute query of the WDK popset record, as well as by
- PopsetClustalOmega. Propagated to portal instances.
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record maps an organism name onto an abbreviation, getting the
- pair either from apidb.Organism or (in the case of Tvag), hardwired
- into the below SQL. This table will eventually be replaced by workflow.
-
-
-
-
-
-
-
-
-
-
- Stores special webservice abbreviations which are not standard organism
- names. Each record maps an organism name onto this abbreviation, as
- well as the species name and project ID. Used by the model and as an
- input in the creation of the OrganismAbbreviationBlast tuning table.
- Propagated to portal instances.
-
-
-
-
-
-
-
-
-
-
- Groups projects by higher level taxonomy. Used in the creation of the
- OldOrganismTree tuning table.
-
-
-
-
-
-
-
-
-
- Group species by higher level taxonomy. Each row associates a taxon of
- interest with one of its ancestors in the taxon tree. Used in parameter
- queries that have to know about the taxon tree, as well as
- apidb.project_id(), the function that maps an organism name to a
- project. Propagated to portal instances.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Group species by higher level taxonomy. Each row associates a taxon of
- interest with one of its ancestors in the taxon tree. Used in parameter
- queries that have to know about the taxon tree. Propagated to portal
- instances.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record maps an organism to its BLAST abbreviation. Used by
- BLAST-query parameters. Propagated to portal instances.
-
-
-
-
-
-
-
-
-
-
-
-
- For each project, show which BLAST databases are available for which
- species. Used in BLAST param queries. Propagated to portal instances.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Properties table for ChEBI Compounds.
-
-
-
-
-
-
-
-
-
-
-
- The BFMV for the compound WDK record type. Used by the model for the
- compound record and queries, as well as in the creation of the
- PathwayCompounds tuning table. Propagated to portal instances.
- Note: children of ChEBI compounds are excluded, but data of these is gathered in the (parent) entries.
-
-
-
-
-
-
-
-
-
-
-
-
- Alias table for Compounds.
- Compound column is the source_id from CompoundAttributes (chEBI_ID).
- ID can have this same ID, or mapping KEGG ID, or Name, or Synonym.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Each record represents a 5-tuple of (reaction, compound, pathway,
- enzyme, type). Used extensively in the model for pathway-related
- queries, as well as by getImageMap.pl.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Aggregates reactions irrespective of pathway. Required to determine if BioCyc reactions are reversible. Used extensively in the model in conjunction with pathwaycompounds for pathway related queries
-
-
-
-
-
-
-
-
-
- ' || o.enzyme || '' ELSE o.enzyme END as expasy_html
- FROM (
- SELECT i.*
- , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-'
- THEN
- 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1='
- || ec.ec_number_1
- || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END
- || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END
- || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END
- ELSE reaction_url END as expasy_url
- , ec.description as enzyme_description
- FROM (
- SELECT
- reaction_id
- , reaction_source_id
- , reaction_url
- , ext_db_name
- , ext_db_version
- , enzyme
- , substrates_html || ' ' || sign || ' ' || products_html as equation_html
- , substrates_text || ' ' || sign || ' ' || products_text as equation_text
- , case when sign = '<=>' then 1 else 0 end as is_reversible
- , substrates_text
- , products_text
- FROM (
- SELECT
- reaction_id
- , reaction_source_id
- , reaction_url
- , ext_db_name
- , ext_db_version
- , enzyme
- , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign
- , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html
- , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text
- , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html
- , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text
- FROM (
- WITH rep AS (
- SELECT DISTINCT
- pr.PATHWAY_REACTION_ID as reaction_id
- , pr.SOURCE_ID as reaction_source_id
- , pn.DISPLAY_LABEL as enzyme
- , coalesce(ca.compound_name, pc.compound_source_id) as compound
- , prel.is_reversible as is_reversible_og
- , last_value(prel.is_reversible) OVER (partition by pr.pathway_reaction_id ORDER BY prel.is_reversible ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) as is_reversible
- , first_value(pc.type) over (partition by pr.pathway_reaction_id, pr.SOURCE_ID, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE, coalesce(pc.chebi_accession, pc.compound_source_id), coalesce(ca.compound_name, pc.compound_source_id) ORDER BY pc.pathway_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as type
- FROM
- sres.pathway p
- , apidb.pathwayreaction pr
- , APIDB.PATHWAYREACTIONREL prr
- , SRES.PATHWAYNODE pn
- , SRES.PATHWAYRELATIONSHIP prel
- , SRES.ONTOLOGYTERM ot
- , PathwayCompounds pc
- LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
- WHERE p.PATHWAY_ID = prr.PATHWAY_ID
- AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
- AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
- AND prel.NODE_ID = pn.PATHWAY_NODE_ID
- AND ot.name = 'enzyme'
- AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
- AND pc.PATHWAY_ID = p.PATHWAY_ID
- AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
- )
- SELECT DISTINCT
- pr.PATHWAY_REACTION_ID as reaction_id
- , pr.SOURCE_ID as reaction_source_id
- , ed.NAME as ext_db_name
- , edr.VERSION as ext_db_version
- , cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme
- , min(rep.is_reversible) as is_reversible
- , min(rep.type) as type
- , string_agg (pc.type, ',' order by p.pathway_id) as type_list
- , coalesce(ca.compound_name, pc.compound_source_id) as compound
- , CASE
- WHEN coalesce(pc.CHEBI_ACCESSION, pc.compound_source_id) LIKE 'CHEBI%'
- THEN '' || coalesce(ca.compound_name, pc.compound_source_id) || ''
- ELSE coalesce(pc.chebi_accession, pc.compound_source_id)
- END as compound_url
- , CASE (replace (replace (ed.name, 'Pathways_', ''), '_RSRC', ''))
- WHEN 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || pr.source_id
- WHEN 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id
- WHEN 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id
- WHEN 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || pr.source_id
- WHEN 'FungiCyc' THEN NULL
- END as reaction_url
- FROM
- sres.pathway p
- , apidb.pathwayreaction pr
- , APIDB.PATHWAYREACTIONREL prr
- , SRES.PATHWAYNODE pn
- , SRES.PATHWAYRELATIONSHIP prel
- , SRES.EXTERNALDATABASE ed
- , SRES.EXTERNALDATABASERELEASE edr
- , SRES.ONTOLOGYTERM ot
- , rep
- , PathwayCompounds pc
- LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
- WHERE p.PATHWAY_ID = prr.PATHWAY_ID
- AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
- AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
- AND prel.NODE_ID = pn.PATHWAY_NODE_ID
- AND ot.name = 'enzyme'
- AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
- AND pc.EXT_DB_NAME = ed.NAME
- AND pc.EXT_DB_VERSION = edr.VERSION
- AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID
- AND pc.PATHWAY_ID = p.PATHWAY_ID
- AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
- AND rep.reaction_id = pr.pathway_reaction_id
- AND rep.reaction_source_id = pr.source_id
- AND rep.compound = coalesce(ca.compound_name, pc.compound_source_id)
- AND rep.enzyme = pn.DISPLAY_LABEL
- AND rep.is_reversible_og = prel.is_reversible
- GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE
- , coalesce(pc.chebi_accession, pc.compound_source_id)
- , coalesce(ca.compound_name, pc.compound_source_id)
- ) t1
- GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible
- ) t2
- ) i
- LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number
- ) o
- ]]>
-
-
-
-
-
-
-
-
- Nodes and edges for pathway maps
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1
- )
- SELECT aee.e_id, pn.*
- FROM pn
- , AllEnzymeEdges aee
- WHERE aee.all_edges = pn.all_edges
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Used by pathway table on gene pages
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- for use in GenesByReactionCompounds question
-
-
-
-
-
-
-
-
-
- Each row stores mass-spec. based expression evidence for one sample of
- one experiment for one gene. Used for mass spec queries in the model,
- GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary
- tuning table.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Stores summary information from annotated genomes to facilitate overview section of gene page
-
-
-
-
-
-
-
-
-
-
-
-
-
- Mass-spec experiment results for a transcript. Used in the model for queries
- related to transcripts.
-
-
-
-
-
-
-
-
-
-
-
-
- Mass-spec experiment results for a peptide. Used by the model, GBrowse,
- and PBrowse.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Data from the Seattle Structural Genomics Center for Infectious Disease,
- populated from their web service. Used in the gene record.
-
-
-
-
-
-
-
- Used by the model and GBrowse, as well as an input in the creation of
- the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary.
-
-
-
-
-
-
-
- Used by the model when writing profile data
-
-
-
-
-
-
-
- Associates an organism with the GBrowse and PBrowse tracks available
- for it. Used by the gene record.
-
-
-
-
-
-
-
-
- Associates an organism with the GBrowse and PBrowse tracks available
- for it. Used by the gene record.
-
-
-
-
-
-
-
-
- Each row maps a dataset onto an ID for which the dataset contains data;
- each dataset gets one such row.
- Used in dataset record queries.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Citation info for proteomics datasets, used by GBrowse
-
-
-
- ' || sample || '' as sample_i
- FROM MSPeptideSummary mps, DatasetPresenter ds
- -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem
- WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern)
- ) t
- group by name, id
- )
- SELECT name,
- substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable')
- || ' PMID: ' || publications || 'Samples:
'
- || sample_table || chr(10) ||
- ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation
- FROM (
- SELECT ds.name as name, ds.summary as description, pubs.contact_email as email,
- pubs.pmids as publications, samples.sample_table as sample_table
- FROM DatasetPresenter ds, pubs, samples
- WHERE ds.dataset_presenter_id = pubs.id
- AND ds.dataset_presenter_id = samples.id
- ) t
- ]]>
-
-
-
-
-
-
-
-
-
-
-
- = 1.5
- ) t
- GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name,
- start_min, end_max, start_max, end_min, organism
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- = commit_after THEN
- COMMIT;
- ctrows := 0;
- END IF;
- END LOOP;
- commit;
- END;
- $$ LANGUAGE PLPGSQL;
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id)
- has data for a profile_set.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- for gene-page expression graphs
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- For each distinct organism in GeneAttributes, all ancestors in the taxon tree. For the gene page.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Materialization of GeneTables.GeneModelDump.
-
-
-
-
-
-
-
-
+ LOOP
+ ctrows := ctrows + 1;
+ INSERT INTO Profile&1
+ (DATASET_NAME, DATASET_TYPE, DATASET_SUBTYPE, PROFILE_TYPE, NODE_TYPE, SOURCE_ID, PROFILE_STUDY_ID, PROFILE_SET_NAME,
+ PROFILE_SET_SUFFIX, PROFILE_AS_STRING, MAX_VALUE, MIN_VALUE, MAX_TIMEPOINT, MIN_TIMEPOINT)
+ VALUES
+ (pf_rows.DATASET_NAME, pf_rows.DATASET_TYPE, pf_rows.DATASET_SUBTYPE, pf_rows.PROFILE_TYPE, pf_rows.NODE_TYPE, pf_rows.SOURCE_ID, pf_rows.PROFILE_STUDY_ID, pf_rows.PROFILE_SET_NAME,
+ pf_rows.PROFILE_SET_SUFFIX, pf_rows.PROFILE_AS_STRING, pf_rows.MAX_VALUE, pf_rows.MIN_VALUE, pf_rows.MAX_TIMEPOINT, pf_rows.MIN_TIMEPOINT);
+ IF ctrows >= commit_after THEN
+ COMMIT;
+ ctrows := 0;
+ END IF;
+ END LOOP;
+ commit;
+ END;
+ $$ LANGUAGE PLPGSQL;
+ ]]>
-
-
-
-
-
-
- Distinct filter_name for gene query summary. For the initial version,
- at least, it's (GeneAttriutes.species UNION GeneAttributes.organism).
- Note that the UNION implies set bahavior and therefore distinctness.
-
-
+ UPDATE Profile&1
+ SET dataset_name = 'tbruTREU927_Rijo_Circadian_Regulation_rnaSeq_RSRC'
+ WHERE dataset_name= 'tbruTREU927_RNASeq_Rijo_Circadian_Regulation_RSRC'
+ ]]>
-
-
- Info from dots.ExternalAaSequence records for BLAT alignments
-
-
-
-
-
+
+
-
-
- Materialization of the orthology transform. Also useful for GeneTables.Orthologs.
-
-
-
-
-
-
-
- = ga.start_min
- and sg.start_min <= ga.end_max
- ]]>
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
-
-
-
-
- For each RNA-Seq Dataset, compute the top 500 ratios of max/min gene expression.
-
-
-
-
-
-
-
-
-
-
-
- for GeneTables.RodMalPhenotypeTable
+
+ For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id)
+ has data for a profile_set.
-
-
-
-
+
+
results.source_id
- OR knockdown.source_id is null
- ORDER BY results.source_id, results.rmgmid
- ]]>
+ create table GeneGroupProfile&1 as
+ select distinct other_gene.source_id, p.dataset_name,
+ this_gene.source_id as profile_graph_id
+ from OrthologousTranscripts ot
+ , Profile p
+ , GeneAttributes this_gene
+ , GeneAttributes other_gene
+ where p.source_id = ot.source_id
+ and ot.source_id = this_gene.source_id
+ and ot.ortho_gene_source_id = other_gene.source_id
+ and this_gene.species = other_gene.species
+ and ot.is_syntenic = 1
+ union
+ select ga.source_id, p.dataset_name, p.source_id as profile_graph_id
+ from Profile p, GeneAttributes ga
+ where p.source_id = ga.source_id
+ ]]>
-
-
- Chromosome data for CNV queries
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Gene Data for CNV queries
-
-
-
+
+
+ for gene-page expression graphs
+
+
+
+
+
+
+
+
+ CREATE TABLE ExpressionGraphsData&1 as
+ -- RNASeq
+ SELECT distinct ga.source_id, dnt.dataset_presenter_id as dataset_id,
+ cast(case when ps.profile_set_suffix is null then ps.protocol_app_node_name
+ when ps.protocol_app_node_name ='value' then ps.profile_set_suffix
+ else ps.protocol_app_node_name || ' - ' || ps.profile_set_suffix end ||
+ case when substr(dp.value, 1, 10) in ('0', 'false') and pan.name like '% firststrand %' then '- sense'
+ when substr(dp.value, 1, 10) in ('0', 'false') and pan.name like '% secondstrand %' then '- antisense'
+ when substr(dp.value, 1, 10) in ('1', 'true') and pan.name like '% firststrand %' then '- antisense'
+ when substr(dp.value, 1, 10) in ('1', 'true') and pan.name like '% secondstrand %' then '- sense'
+ else '' end ||
+ case when pan.name like '% unique%' then ' - unique'
+ else '' end
+ as varchar(300)) AS sample_name,
+ round(nfe.value::numeric, 2) as value,
+ round(nfe.standard_error::numeric, 2) as standard_error,
+ round(nfe.percentile_channel1::numeric, 2) as percentile_channel1,
+ round(nfe.percentile_channel2::numeric, 2) as percentile_channel2,
+ ps.node_order_num,
+ ps.protocol_app_node_id
+ FROM results.NaFeatureExpression nfe, GeneAttributes ga, study.ProtocolAppNode pan,
+ ProfileSamples ps, study.nodeSet s,
+ sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
+ DatasetPresenter dnt, DatasetProperty dp
+ WHERE ga.na_feature_id = nfe.na_feature_id
+ AND nfe.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id = ps.protocol_app_node_id
+ AND ps.study_id = s.node_set_id
+ AND s.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND ps.dataset_name = dnt.name
+ AND dnt.dataset_presenter_id = dp.dataset_presenter_id
+ AND dp.property = 'switchStrandsProfiles'
+ AND ps.study_name not like '%cuff%'
+ AND ps.study_name not like '%htseq-intersection-nonempty%'
+ AND ps.study_name not like '%htseq-intersection-strict%'
+ UNION
+ -- Splice Site data
+ SELECT ga.source_id, dnt.dataset_presenter_id as dataset_id,
+ cast (case when ps.profile_set_suffix is null
+ then ps.protocol_app_node_name
+ else ps.protocol_app_node_name || ' ' || ps.profile_set_suffix
+ end as varchar(300)) AS sample_name,
+ round(nfe.value::numeric, 2) as value,
+ round(nfe.standard_error::numeric, 2) as standard_error,
+ round(nfe.percentile_channel1::numeric, 2) as percentile_channel1,
+ round(nfe.percentile_channel2::numeric, 2) as percentile_channel2,
+ ps.node_order_num,
+ ps.protocol_app_node_id
+ FROM results.NaFeatureExpression nfe, GeneAttributes ga, study.ProtocolAppNode pan,
+ ProfileSamples ps, study.nodeSet s,
+ sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
+ DatasetNameTaxon dnt,DatasetProperty dp
+ WHERE ga.na_feature_id = nfe.na_feature_id
+ AND nfe.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id = ps.protocol_app_node_id
+ AND ps.study_id = s.node_set_id
+ AND s.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND d.name = dnt.name
+ AND dnt.dataset_presenter_id = dp.dataset_presenter_id
+ AND dp.property = 'type' and substr(dp.value, 1, 10) = 'Splice Site'
+ UNION
+ -- microarray expression, quantitative proteomics, and eQTL
+ SELECT ga.source_id, dnt.dataset_presenter_id as dataset_id,
+ cast (case when ps.profile_set_suffix is null
+ then ps.protocol_app_node_name
+ else ps.protocol_app_node_name || ' ' || ps.profile_set_suffix
+ end as varchar(300)) AS sample_name,
+ CASE WHEN (d.NAME ='pfal3D7_quantitativeMassSpec_Apicoplast_ER_RSRC')
+ THEN round(nfe.VALUE::numeric, 6)
+ ELSE round(nfe.value::numeric, 2) END as value,
+ round(nfe.standard_error::numeric, 2) as standard_error,
+ round(nfe.percentile_channel1::numeric, 2) as percentile_channel1,
+ round(nfe.percentile_channel2::numeric, 2) as percentile_channel2,
+ ps.node_order_num,
+ ps.protocol_app_node_id
+ FROM results.NaFeatureExpression nfe, GeneAttributes ga, study.ProtocolAppNode pan,
+ ProfileSamples ps, study.nodeSet s,
+ sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
+ DatasetNameTaxon dnt, DatasetPresenter dsp
+ WHERE ga.na_feature_id = nfe.na_feature_id
+ AND nfe.protocol_app_node_id = pan.protocol_app_node_id
+ AND pan.protocol_app_node_id = ps.protocol_app_node_id
+ AND ps.study_id = s.node_set_id
+ AND s.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND d.name = dnt.name
+ AND dnt.dataset_presenter_id = dsp.dataset_presenter_id
+ AND dsp.subtype != 'rnaseq'
+ ]]>
+ ]]>
@@ -8690,90 +2503,6 @@ WHERE blat.query_na_sequence_id = etn.na_sequence_id
-
- for TranscriptAttributes.InterproColumns
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
annotation updates from Apollo
@@ -8783,37 +2512,6 @@ WHERE blat.query_na_sequence_id = etn.na_sequence_id
-
- semicolon-delimited list of formatted genomic locations for each gene
-
-
-
-
-
-
-
-
-
@@ -9017,32 +2715,6 @@ sub readClob {
-->
-
-
- One phylogenetic-profile string per ortholog group
-
-
-
-
-
-
-
-
-
-
-
@@ -9204,252 +2876,6 @@ sub readClob {
-
-
- Links AlphaFold entries to gene ids where Uniprot ids are directly assigned
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- Stores intron support for introns
-
-
-
-
-
-
-
- = CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= 2 /*stats.perc0005_annot_percent_max*/)
- ) t
- GROUP BY gene_source_id, ontology_term, intron_count
- ) t
- WHERE string_value = 'All'
-
- UNION
-
- SELECT gene_source_id
- , ontology_term
- , case when count(*) = intron_count THEN 'All-high'
- when count(*) = 0 THEN 'None'
- else 'Any-high' end as string_value
- FROM (
- SELECT gij.gene_source_id
- , 'intron_junction' as ontology_term
- , intronCount.intron_count
- FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
- , (SELECT count (*) as intron_count, source_id FROM apidbtuning.genemodeldump WHERE type = 'Intron' GROUP BY source_id) intronCount
- WHERE gij.gene_source_id = intronCount.source_id
- AND gij.na_sequence_id = stats.na_sequence_id
- AND gij.annotated_intron = 'Yes'
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= 2)
- ) t2
- GROUP BY gene_source_id, ontology_term, intron_count
-
- UNION
-
- SELECT gene_source_id
- , ontology_term
- , CASE WHEN count(*) = intron_count THEN 'All-low'
- WHEN count(*) = 0 THEN 'None'
- ELSE 'Any-low' END as string_value
- FROM (
- SELECT gij.gene_source_id
- , 'intron_junction' as ontology_term
- , intronCount.intron_count
- FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
- , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
- WHERE gij.gene_source_id = intronCount.source_id
- AND gij.na_sequence_id = stats.na_sequence_id
- AND gij.annotated_intron = 'Yes'
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max)
- AND gij.intron_feature_id not in (
- SELECT gij.intron_feature_id
- FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats
- WHERE gij.na_sequence_id = stats.na_sequence_id
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= 2)
- )
- ) t3
- GROUP BY gene_source_id, ontology_term, intron_count
-
- UNION
-
- SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-low') as string_value
- FROM (
- SELECT gene_source_id
- , ontology_term
- , case when count(*) = intron_count THEN 'All'
- when count(*) = 0 THEN 'None'
- else 'Any' end as string_value
- FROM (
- SELECT gij.gene_source_id
- , 'intron_junction' as ontology_term
- , intronCount.intron_count
- FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
- , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
- WHERE gij.gene_source_id = intronCount.source_id
- AND gij.na_sequence_id = stats.na_sequence_id
- AND gij.annotated_intron = 'Yes'
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max)
- AND gij.intron_feature_id not in (
- SELECT gij.intron_feature_id
- FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats
- WHERE gij.na_sequence_id = stats.na_sequence_id
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= 2)
- )
- ) t
- GROUP BY gene_source_id, ontology_term, intron_count
- ) t4
- WHERE string_value = 'All'
- ) t
-]]>
-
-
-
all products for each gene
diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt
index d7d5687d76..1806c9dd44 100644
--- a/Model/lib/xml/tuningManager/tablePruning.txt
+++ b/Model/lib/xml/tuningManager/tablePruning.txt
@@ -56,7 +56,7 @@ R
R
R
MO (replace study table with nodeset)
-MO (replace study table with nodeset)
+MO FIX (replace study table with nodeset)
MO
R (never used but possibly should add back?)
R
@@ -127,7 +127,7 @@ R (transcript attributes query needs to use n
K
MO
K
-MO (rm auto_lob; don't need to loop over chunks in postgres)
+MO FIX (rm auto_lob; don't need to loop over chunks in postgres)
MC
K
K
From 4e14421c572ef0b5e11900f42d5725062c983f24 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 20 May 2025 14:47:40 -0400
Subject: [PATCH 028/112] mv pathwaynodes
---
Model/lib/psql/webready/{global => orgSpecific}/PathwayNodes.psql | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename Model/lib/psql/webready/{global => orgSpecific}/PathwayNodes.psql (100%)
diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodes.psql
similarity index 100%
rename from Model/lib/psql/webready/global/PathwayNodes.psql
rename to Model/lib/psql/webready/orgSpecific/PathwayNodes.psql
From 82ce2967cbf8b49029c7c1102466676f27465342 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 20 May 2025 14:51:39 -0400
Subject: [PATCH 029/112] mv pathwaynodes
---
Model/lib/psql/webready/{orgSpecific => global}/PathwayNodes.psql | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename Model/lib/psql/webready/{orgSpecific => global}/PathwayNodes.psql (100%)
diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
similarity index 100%
rename from Model/lib/psql/webready/orgSpecific/PathwayNodes.psql
rename to Model/lib/psql/webready/global/PathwayNodes.psql
From cfcb7e626b542f355c149d3b92ccf663b58f4ade Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 20 May 2025 16:49:25 -0400
Subject: [PATCH 030/112] adjust PathwayNodes
---
Model/lib/psql/webready/global/PathwayNodes.psql | 5 -----
1 file changed, 5 deletions(-)
diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
index 87a255e6ae..781582fdf1 100644
--- a/Model/lib/psql/webready/global/PathwayNodes.psql
+++ b/Model/lib/psql/webready/global/PathwayNodes.psql
@@ -13,7 +13,6 @@
, pn.height
, pn.cellular_location
, ot.name AS type
- , NULL AS gene_count
, NULL AS default_structure
FROM sres.pathwaynode pn
INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
@@ -32,12 +31,10 @@
, pn.height
, pn.cellular_location
, ot.name AS type
- , count (tp.gene_source_id) as gene_count
, NULL AS default_structure
FROM sres.pathwaynode pn
INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
LEFT JOIN sres.enzymeclass ec ON pn.row_id = ec.enzyme_class_id
- LEFT JOIN :SCHEMA.PathwayNodeGene tp ON pn.pathway_node_id = tp.pathway_node_id
WHERE ot.name = 'enzyme'
GROUP BY pn.pathway_id
, pn.display_label
@@ -63,7 +60,6 @@
, pn.height
, pn.cellular_location
, ot.name AS type
- , NULL AS gene_count
, st.default_structure
FROM sres.pathwaynode pn
INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id
@@ -262,7 +258,6 @@
, nodes_with_parents.parent
, reaction_source_id
, coalesce(type, nodes_with_parents.node_type) AS node_type
- , pn.gene_count
, pn.default_structure
FROM
( SELECT e_id::varchar AS pathway_node_id
From 5f6cd2cbf088895047d6e1b180a3f8751ee20b32 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 20 May 2025 17:38:50 -0400
Subject: [PATCH 031/112] fix pathwaynodes
---
Model/lib/psql/webready/global/PathwayNodes.psql | 2 +-
Model/lib/xml/tuningManager/tablePruning.txt | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
index 781582fdf1..eef89059da 100644
--- a/Model/lib/psql/webready/global/PathwayNodes.psql
+++ b/Model/lib/psql/webready/global/PathwayNodes.psql
@@ -70,7 +70,7 @@
FROM chebi.structures s
, (SELECT id
, compound
- FROM CompoundId
+ FROM :SCHEMA.CompoundId
WHERE type IN ('same ID', 'child ID')
) n
WHERE n.id = 'CHEBI:' || s.compound_id
diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt
index 1806c9dd44..11012b11d9 100644
--- a/Model/lib/xml/tuningManager/tablePruning.txt
+++ b/Model/lib/xml/tuningManager/tablePruning.txt
@@ -1,4 +1,4 @@
-MG
+K FIXFIX
MO
??
??
From 8cbe278745f17e8250284db3e940fad7cb85e532 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 20 May 2025 21:33:56 -0400
Subject: [PATCH 032/112] clean comparative and global
---
.../webready/comparative/AlphaFoldGenes.psql | 40 ++++++++----------
.../comparative/AlphaFoldGenes_ix.psql | 3 ++
.../comparative/GroupPhylogeneticProfile.psql | 10 +----
.../GroupPhylogeneticProfile_ix.psql | 3 ++
.../comparative/OrthologousTranscripts.psql | 41 ++++---------------
.../OrthologousTranscripts_ix.psql | 12 ++++++
.../webready/global/CompoundAttributes.psql | 4 +-
.../lib/psql/webready/global/CompoundId.psql | 2 +-
.../webready/global/PathwayReactions.psql | 2 +-
.../webready/global/SequenceAttributes.psql | 33 +--------------
.../global/SequenceAttributes_ix.psql | 31 ++++++++++++++
11 files changed, 81 insertions(+), 100 deletions(-)
create mode 100644 Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql
create mode 100644 Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql
create mode 100644 Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql
create mode 100644 Model/lib/psql/webready/global/SequenceAttributes_ix.psql
diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
index 164c150fcb..2e9c2c7e34 100644
--- a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
+++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
@@ -1,6 +1,6 @@
-
-
- CREATE TABLE uniprotGenes AS
+ drop table if exists :SCHEMA.uniprotgenes;
+
+ CREATE TABLE :SCHEMA.uniprotGenes AS
SELECT DISTINCT ed.name
, d.*
, edr.version
@@ -18,7 +18,7 @@
, sres.externaldatabaserelease edr
, dots.dbrefaafeature db
, dots.aafeature aa
- , ProteinAttributes pa
+ , :SCHEMA.ProteinAttributes pa
WHERE (ed.name = 'Uniprot/SWISSPROT' OR ed.name = 'Uniprot/SPTREMBL')
AND (edr.version = 'xrefuniparc' OR edr.version = 'xref_sprot_blastp' OR edr.version = 'xref_trembl_blastp')
AND edr.external_database_id = ed.external_database_id
@@ -40,7 +40,7 @@
, sres.externaldatabaserelease edr
, dots.dbrefnafeature db
, dots.nafeature na
- , TranscriptAttributes ta
+ , :SCHEMA.TranscriptAttributes ta
WHERE ed.name like '%_dbxref_%niprot_%RSRC'
AND edr.external_database_id = ed.external_database_id
AND d.external_database_release_id = edr.external_database_release_id
@@ -50,37 +50,34 @@
;
-
+ drop table if exists :SCHEMA.minrank
- CREATE UNLOGGED TABLE minRank AS (
+ CREATE UNLOGGED TABLE :SCHEMA.minRank AS (
SELECT gene_source_id
, MIN(rank) as min_rank
- FROM uniprotGenes upg
+ FROM :SCHEMA.uniprotGenes upg
WHERE hit_length is not null
GROUP BY gene_source_id
)
;
-
+ drop table if exists :SCHEMA.alphafoldhits;
- CREATE UNLOGGED TABLE alphaFoldHits AS (
+ CREATE UNLOGGED TABLE :SCHEMA.alphaFoldHits AS (
SELECT DISTINCT gene_source_id
, last_value(primary_identifier) over (PARTITION BY gene_source_id ORDER BY hit_length ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS uniprot_id
FROM (
SELECT upg.*
- FROM uniprotGenes upg
- , minRank
+ FROM :SCHEMA.uniprotGenes upg
+ , :SCHEMA.minRank
WHERE upg.gene_source_id = minRank.gene_source_id
AND upg.rank = minRank.min_rank
) t
)
-
;
-
-
- CREATE TABLE AlphaFoldGenes AS (
+ CREATE TABLE :SCHEMA.AlphaFoldGenes AS (
SELECT afh.gene_source_id
, af.uniprot_id
, af.source_id as alphafold_id
@@ -88,16 +85,13 @@
, af.first_residue_index
, af.last_residue_index
FROM apidb.alphafold af
- , alphaFoldHits afh
+ , :SCHEMA.alphaFoldHits afh
WHERE afh.uniprot_id = af.uniprot_id
)
;
-
-
- CREATE index AlphaFoldGenes_idx ON AlphaFoldGenes (gene_source_id, uniprot_id)
-
-
- ;
+ drop table if exists :SCHEMA.uniprotgenes;
+ drop table if exists :SCHEMA.minrank
+ drop table if exists :SCHEMA.alphafoldhits;
diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql
new file mode 100644
index 0000000000..df161ee060
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql
@@ -0,0 +1,3 @@
+ CREATE index AlphaFoldGenes_idx ON AlphaFoldGenes (gene_source_id, uniprot_id)
+
+ ;
diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
index a7d484850c..ddb118b5e4 100644
--- a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
+++ b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
@@ -1,10 +1,9 @@
-
- CREATE table GroupPhylogeneticProfile as
+ CREATE table :SCHEMA.GroupPhylogeneticProfile as
SELECT rep.orthomcl_name, pp.profile_string
FROM apidb.PhylogeneticProfile pp,
(SELECT orthomcl_name, max(source_id) as source_id
- FROM GeneAttributes
+ FROM :SCHEM.GeneAttributes
GROUP BY orthomcl_name) rep
WHERE rep.source_id = pp.source_id
@@ -12,9 +11,4 @@
- create index group_pp_ix
- on GroupPhylogeneticProfile (orthomcl_name)
-
-
- ;
diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql
new file mode 100644
index 0000000000..1753b0f2db
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql
@@ -0,0 +1,3 @@
+ create index group_pp_ix
+ on :SCHEMA.GroupPhylogeneticProfile (orthomcl_name)
+ ;
diff --git a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
index d6e7191d1e..e4766f036f 100644
--- a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
+++ b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
@@ -1,25 +1,19 @@
+ drop table if exists :SCHEMA.SyntenicPairs;
-
- create UNLOGGED table SyntenicPairs as
+ create UNLOGGED table :SCHEMA.SyntenicPairs as
select distinct ga.na_feature_id, sg.syn_na_feature_id
- from apidb.SyntenicGene sg, GeneAttributes ga
+ from apidb.SyntenicGene sg, :SCHEMA.GeneAttributes ga
where sg.na_sequence_id = ga.na_sequence_id
and sg.end_max >= ga.start_min
and sg.start_min <= ga.end_max
;
-
-
create index SynPair_idx
- on SyntenicPairs (na_feature_id, syn_na_feature_id)
-
-
+ on :SCHEMA.SyntenicPairs (na_feature_id, syn_na_feature_id)
;
-
-
- create table OrthologousTranscripts as
+ create table :SCHEMA.OrthologousTranscripts as
with all_pairs
as (select ga.source_id
, ga.project_id
@@ -34,8 +28,8 @@
, ota.organism as ortho_organism
, ota.taxon_id as ortho_taxon_id
, o.is_reference_strain
- from Geneattributes ga
- , TranscriptAttributes ota
+ from :SCHEMA.Geneattributes ga
+ , :SCHEMA.TranscriptAttributes ota
, apidb.Organism o
where ga.ORTHOMCL_NAME = ota.ORTHOMCL_NAME
and ota.taxon_id = o.taxon_id
@@ -49,25 +43,6 @@
left join syn_pairs
on all_pairs.na_feature_id = syn_pairs.na_feature_id
and all_pairs.ortho_na_feature_id = syn_pairs.syn_na_feature_id
-
- ;
-
-
-
- create index ot_idx
- on OrthologousTranscripts (source_id, project_id, is_syntenic desc, ortho_source_id,
- ortho_project_id, ortho_gene_source_id, ortho_product,
- ortho_name, ortho_organism, ortho_taxon_id, is_reference_strain)
-
-
- ;
-
-
-
- create index ot_smol_idx
- on OrthologousTranscripts (is_syntenic, ortho_taxon_id, source_id, ortho_source_id,
- ortho_project_id, ortho_gene_source_id)
-
-
;
+ drop table if exists :SCHEMA.SyntenicPairs;
diff --git a/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql
new file mode 100644
index 0000000000..677049f727
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql
@@ -0,0 +1,12 @@
+
+ create index ot_idx
+ on :SCHEMA.OrthologousTranscripts (source_id, project_id, is_syntenic desc, ortho_source_id,
+ ortho_project_id, ortho_gene_source_id, ortho_product,
+ ortho_name, ortho_organism, ortho_taxon_id, is_reference_strain)
+ ;
+
+ create index ot_smol_idx
+ on :SCHEMA.OrthologousTranscripts (is_syntenic, ortho_taxon_id, source_id, ortho_source_id,
+ ortho_project_id, ortho_gene_source_id)
+ ;
+
diff --git a/Model/lib/psql/webready/global/CompoundAttributes.psql b/Model/lib/psql/webready/global/CompoundAttributes.psql
index 22f54648e9..aa622e1f39 100644
--- a/Model/lib/psql/webready/global/CompoundAttributes.psql
+++ b/Model/lib/psql/webready/global/CompoundAttributes.psql
@@ -9,8 +9,8 @@
, p.secondary_ids
, string_agg(childc.formula, ';' ORDER BY childc.formula) AS formula
, avg(childc.mass::numeric) AS mass
- FROM CompoundProperties p
- , (SELECT id, parent_id, other_names, iupac_name, syn, mass, formula FROM CompoundProperties ) childc
+ FROM :SCHEMA.CompoundProperties p
+ , (SELECT id, parent_id, other_names, iupac_name, syn, mass, formula FROM :SCHEMA.CompoundProperties ) childc
WHERE p.parent_id IS NULL
AND ( p.ID = childc.parent_id OR p.ID = childc.ID )
GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids
diff --git a/Model/lib/psql/webready/global/CompoundId.psql b/Model/lib/psql/webready/global/CompoundId.psql
index ca4a9c9c60..9cd80c05a2 100644
--- a/Model/lib/psql/webready/global/CompoundId.psql
+++ b/Model/lib/psql/webready/global/CompoundId.psql
@@ -3,7 +3,7 @@
FROM :SCHEMA.CompoundAttributes
UNION
SELECT p.source_id AS id, ca.source_id AS compound, 'child ID' AS type, '' as source
- FROM :SCHEMA.CompoundAttributes ca, CompoundProperties p
+ FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.CompoundProperties p
WHERE ca.id = p.parent_id
UNION
SELECT da.accession_number AS id, p.source_id AS compound, 'KEGG' AS type, '' as source
diff --git a/Model/lib/psql/webready/global/PathwayReactions.psql b/Model/lib/psql/webready/global/PathwayReactions.psql
index 5787e17267..46e140b408 100644
--- a/Model/lib/psql/webready/global/PathwayReactions.psql
+++ b/Model/lib/psql/webready/global/PathwayReactions.psql
@@ -100,7 +100,7 @@
, SRES.ONTOLOGYTERM ot
, rep
, :SCHEMA.PathwayCompounds pc
- LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
+ LEFT JOIN :SCHEMA.CompoundAttributes ca ON pc.chebi_accession = ca.source_id
WHERE p.PATHWAY_ID = prr.PATHWAY_ID
AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
diff --git a/Model/lib/psql/webready/global/SequenceAttributes.psql b/Model/lib/psql/webready/global/SequenceAttributes.psql
index 4cba9b09fb..77e8aedba8 100644
--- a/Model/lib/psql/webready/global/SequenceAttributes.psql
+++ b/Model/lib/psql/webready/global/SequenceAttributes.psql
@@ -93,39 +93,8 @@
- alter table SequenceAttributes
+ alter table :SCHEMA.SequenceAttributes
add constraint SeqAttrs_pk primary key (full_id)
;
-
-
- create unique index SeqAttrs_idx ON SequenceAttributes (full_id, group_name, taxon_id, source_id)
-
- ;
-
-
-
- create unique index SeqAttrs_gusIdx ON SequenceAttributes (ortholog_group_id, aa_sequence_id)
-
- ;
-
-
-
- create unique index SeqAttrs_idx2 ON SequenceAttributes (group_name, length desc, full_id, taxon_id)
-
- ;
-
-
-
- create unique index SeqAttrs_idx3
- on SequenceAttributes (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id)
-
- ;
-
-
-
- create unique index SeqAttrs_idx4 ON SequenceAttributes (source_id, full_id, group_name, taxon_id)
-
- ;
-
diff --git a/Model/lib/psql/webready/global/SequenceAttributes_ix.psql b/Model/lib/psql/webready/global/SequenceAttributes_ix.psql
new file mode 100644
index 0000000000..88e66acdde
--- /dev/null
+++ b/Model/lib/psql/webready/global/SequenceAttributes_ix.psql
@@ -0,0 +1,31 @@
+
+
+ create unique index SeqAttrs_idx ON :SCHEMA.SequenceAttributes (full_id, group_name, taxon_id, source_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_gusIdx ON :SCHEMA.SequenceAttributes (ortholog_group_id, aa_sequence_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_idx2 ON :SCHEMA.SequenceAttributes (group_name, length desc, full_id, taxon_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_idx3
+ on :SCHEMA.SequenceAttributes (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id)
+
+ ;
+
+
+
+ create unique index SeqAttrs_idx4 ON :SCHEMA.SequenceAttributes (source_id, full_id, group_name, taxon_id)
+
+ ;
+
From 0141db3799eefcc7d389fd4d27845d2c207dd241 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Wed, 21 May 2025 14:17:03 -0400
Subject: [PATCH 033/112] fix index schema
---
Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql | 2 +-
Model/lib/psql/webready/global/CompoundAttributes_ix.psql | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql
index df161ee060..5a938f8616 100644
--- a/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql
+++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql
@@ -1,3 +1,3 @@
- CREATE index AlphaFoldGenes_idx ON AlphaFoldGenes (gene_source_id, uniprot_id)
+ CREATE index AlphaFoldGenes_idx ON :SCHEMA.AlphaFoldGenes (gene_source_id, uniprot_id)
;
diff --git a/Model/lib/psql/webready/global/CompoundAttributes_ix.psql b/Model/lib/psql/webready/global/CompoundAttributes_ix.psql
index 697149da51..2cdd87d86e 100644
--- a/Model/lib/psql/webready/global/CompoundAttributes_ix.psql
+++ b/Model/lib/psql/webready/global/CompoundAttributes_ix.psql
@@ -1,2 +1,2 @@
- CREATE INDEX CompoundAttributes_idx ON CompoundAttributes (source_id)
+ CREATE INDEX CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id)
;
From 7bea7835a1a33228ab20908a66d3cf1e71a8013d Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 10:08:10 -0400
Subject: [PATCH 034/112] optimize compprops and pathreact
---
.../global/CompoundProperties_ix.psql | 2 +
.../webready/global/PathwayReactions.psql | 115 ++++++++++--------
2 files changed, 65 insertions(+), 52 deletions(-)
create mode 100644 Model/lib/psql/webready/global/CompoundProperties_ix.psql
diff --git a/Model/lib/psql/webready/global/CompoundProperties_ix.psql b/Model/lib/psql/webready/global/CompoundProperties_ix.psql
new file mode 100644
index 0000000000..46bd882ae0
--- /dev/null
+++ b/Model/lib/psql/webready/global/CompoundProperties_ix.psql
@@ -0,0 +1,2 @@
+CREATE INDEX CompoundProperties_idx1 ON :SCHEMA.CompoundProperties (id)
+ CREATE INDEX CompoundProperties_idx2 ON :SCHEMA.CompoundProperties (parent_id)
\ No newline at end of file
diff --git a/Model/lib/psql/webready/global/PathwayReactions.psql b/Model/lib/psql/webready/global/PathwayReactions.psql
index 46e140b408..93e6e626c4 100644
--- a/Model/lib/psql/webready/global/PathwayReactions.psql
+++ b/Model/lib/psql/webready/global/PathwayReactions.psql
@@ -1,45 +1,6 @@
- CREATE TABLE :SCHEMA.PathwayReactions AS
- SELECT o.*
- , CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html
- FROM (
- SELECT i.*
- , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-'
- THEN
- 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1='
- || ec.ec_number_1
- || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END
- || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END
- || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END
- ELSE reaction_url END as expasy_url
- , ec.description as enzyme_description
- FROM (
- SELECT
- reaction_id
- , reaction_source_id
- , reaction_url
- , ext_db_name
- , ext_db_version
- , enzyme
- , substrates_html || ' ' || sign || ' ' || products_html as equation_html
- , substrates_text || ' ' || sign || ' ' || products_text as equation_text
- , case when sign = '<=>' then 1 else 0 end as is_reversible
- , substrates_text
- , products_text
- FROM (
- SELECT
- reaction_id
- , reaction_source_id
- , reaction_url
- , ext_db_name
- , ext_db_version
- , enzyme
- , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign
- , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html
- , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text
- , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html
- , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text
- FROM (
- WITH rep AS (
+ drop table if exists :SCHEMA.PR_rep;
+
+ create unlogged table :SCHEMA.PR_rep as
SELECT DISTINCT
pr.PATHWAY_REACTION_ID as reaction_id
, pr.SOURCE_ID as reaction_source_id
@@ -65,15 +26,20 @@
AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
AND pc.PATHWAY_ID = p.PATHWAY_ID
AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
- )
+ ;
+
+
+ drop table if exists :SCHEMA.PR_t1;
+
+ create unlogged table :SCHEMA.PR_t1 as
SELECT DISTINCT
pr.PATHWAY_REACTION_ID as reaction_id
, pr.SOURCE_ID as reaction_source_id
, ed.NAME as ext_db_name
, edr.VERSION as ext_db_version
, cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme
- , min(rep.is_reversible) as is_reversible
- , min(rep.type) as type
+ , min(PR_rep.is_reversible) as is_reversible
+ , min(PR_rep.type) as type
, string_agg (pc.type, ',' order by p.pathway_id) as type_list
, coalesce(ca.compound_name, pc.compound_source_id) as compound
, CASE
@@ -98,7 +64,7 @@
, SRES.EXTERNALDATABASE ed
, SRES.EXTERNALDATABASERELEASE edr
, SRES.ONTOLOGYTERM ot
- , rep
+ , :SCHEMA.pr_rep pr_rep
, :SCHEMA.PathwayCompounds pc
LEFT JOIN :SCHEMA.CompoundAttributes ca ON pc.chebi_accession = ca.source_id
WHERE p.PATHWAY_ID = prr.PATHWAY_ID
@@ -112,15 +78,58 @@
AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID
AND pc.PATHWAY_ID = p.PATHWAY_ID
AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
- AND rep.reaction_id = pr.pathway_reaction_id
- AND rep.reaction_source_id = pr.source_id
- AND rep.compound = coalesce(ca.compound_name, pc.compound_source_id)
- AND rep.enzyme = pn.DISPLAY_LABEL
- AND rep.is_reversible_og = prel.is_reversible
+ AND PR_rep.reaction_id = pr.pathway_reaction_id
+ AND PR_rep.reaction_source_id = pr.source_id
+ AND PR_rep.compound = coalesce(ca.compound_name, pc.compound_source_id)
+ AND PR_rep.enzyme = pn.DISPLAY_LABEL
+ AND PR_rep.is_reversible_og = prel.is_reversible
GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE
, coalesce(pc.chebi_accession, pc.compound_source_id)
, coalesce(ca.compound_name, pc.compound_source_id)
- ) t1
+ ;
+
+
+ CREATE TABLE :SCHEMA.PathwayReactions AS
+ SELECT o.*
+ , CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html
+ FROM (
+ SELECT i.*
+ , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-'
+ THEN
+ 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1='
+ || ec.ec_number_1
+ || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END
+ || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END
+ || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END
+ ELSE reaction_url END as expasy_url
+ , ec.description as enzyme_description
+ FROM (
+ SELECT
+ reaction_id
+ , reaction_source_id
+ , reaction_url
+ , ext_db_name
+ , ext_db_version
+ , enzyme
+ , substrates_html || ' ' || sign || ' ' || products_html as equation_html
+ , substrates_text || ' ' || sign || ' ' || products_text as equation_text
+ , case when sign = '<=>' then 1 else 0 end as is_reversible
+ , substrates_text
+ , products_text
+ FROM (
+ SELECT
+ reaction_id
+ , reaction_source_id
+ , reaction_url
+ , ext_db_name
+ , ext_db_version
+ , enzyme
+ , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign
+ , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html
+ , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text
+ , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html
+ , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text
+ FROM :SCHEMA.PR_t1 as t1
GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible
) t2
) i
@@ -129,3 +138,5 @@
;
+ drop table if exists :SCHEMA.PR_rep;
+ drop table if exists :SCHEMA.PR_t1;
From 74d57189922f8ebc40d587786228536237708d6e Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 12:27:54 -0400
Subject: [PATCH 035/112] fix missing :SCHEMA
---
Model/lib/psql/webready/global/PathwayNodes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
index eef89059da..20b181f77c 100644
--- a/Model/lib/psql/webready/global/PathwayNodes.psql
+++ b/Model/lib/psql/webready/global/PathwayNodes.psql
@@ -126,7 +126,7 @@
SELECT string_agg(io, ',' ORDER BY io) AS all_edges
, e_id
, pathway_id
- FROM EnzymeEdges
+ FROM :SCHEMA.EnzymeEdges
GROUP BY pathway_id
, e_id
)
From 30e58841fa3193f4d0501cda09f8d2d78143857c Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 12:44:49 -0400
Subject: [PATCH 036/112] fix typos
---
Model/lib/psql/webready/global/CompoundProperties_ix.psql | 5 +++--
Model/lib/psql/webready/global/PathwayAttributes_ix.psql | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/global/CompoundProperties_ix.psql b/Model/lib/psql/webready/global/CompoundProperties_ix.psql
index 46bd882ae0..5943ec2260 100644
--- a/Model/lib/psql/webready/global/CompoundProperties_ix.psql
+++ b/Model/lib/psql/webready/global/CompoundProperties_ix.psql
@@ -1,2 +1,3 @@
-CREATE INDEX CompoundProperties_idx1 ON :SCHEMA.CompoundProperties (id)
- CREATE INDEX CompoundProperties_idx2 ON :SCHEMA.CompoundProperties (parent_id)
\ No newline at end of file
+CREATE INDEX CompoundProperties_idx1 ON :SCHEMA.CompoundProperties (id);
+
+ CREATE INDEX CompoundProperties_idx2 ON :SCHEMA.CompoundProperties (parent_id);
\ No newline at end of file
diff --git a/Model/lib/psql/webready/global/PathwayAttributes_ix.psql b/Model/lib/psql/webready/global/PathwayAttributes_ix.psql
index a5c4902674..492ee68829 100644
--- a/Model/lib/psql/webready/global/PathwayAttributes_ix.psql
+++ b/Model/lib/psql/webready/global/PathwayAttributes_ix.psql
@@ -1,4 +1,4 @@
- CREATE UNIQUE PathAttr_sourceId_pwaySrc
+ CREATE UNIQUE index PathAttr_sourceId_pwaySrc
ON :SCHEMA.PathwayAttributes (source_id, pathway_source)
;
From 48d2470daa966ca221e6364521bc73f17c7dba45 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 12:53:59 -0400
Subject: [PATCH 037/112] fix typo
---
Model/lib/psql/webready/global/PathwayNodes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
index 20b181f77c..406129eefb 100644
--- a/Model/lib/psql/webready/global/PathwayNodes.psql
+++ b/Model/lib/psql/webready/global/PathwayNodes.psql
@@ -134,7 +134,7 @@
SELECT pathway_id
, all_edges
, string_agg(e_id::varchar, '_' ORDER BY e_id) AS parent
- FROM :SCHEMA.AllEnzymeEdges
+ FROM AllEnzymeEdges
GROUP BY pathway_id
, all_edges
HAVING COUNT (*) > 1
From 36184a875d4ca15af061044d2b5ec4cb02fd1b44 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 12:58:39 -0400
Subject: [PATCH 038/112] fix typo
---
Model/lib/psql/webready/global/PathwayNodes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
index 406129eefb..6510bb9735 100644
--- a/Model/lib/psql/webready/global/PathwayNodes.psql
+++ b/Model/lib/psql/webready/global/PathwayNodes.psql
@@ -228,7 +228,7 @@
AND ot1.name != 'enzyme'
AND ot2.name != 'enzyme'
) rel
- , PathwayAttributes pa
+ , :SCHEMA.PathwayAttributes pa
WHERE pa.pathway_id = rel.pathway_id
;
From a4dbadb5d08d407153f8e00ff17929839c3b5f77 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 13:04:16 -0400
Subject: [PATCH 039/112] fix typo
---
Model/lib/psql/webready/global/PathwayNodes.psql | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql
index 6510bb9735..bcd0979283 100644
--- a/Model/lib/psql/webready/global/PathwayNodes.psql
+++ b/Model/lib/psql/webready/global/PathwayNodes.psql
@@ -288,14 +288,14 @@
LEFT JOIN :SCHEMA.NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar
;
-drop table :SCHEMA.NodesWithTypes;
-drop table :SCHEMA.ReactionsWithReversibility;
-drop table :SCHEMA.EnzymeEdges;
-drop table :SCHEMA.ParentNodes;
-drop table :SCHEMA.NodesWithParents;
-drop table :SCHEMA.EnzymeReactions;
-drop table :SCHEMA.ParentsForEdges;
-drop table :SCHEMA.ParentsForEdges;
+drop table if exists :SCHEMA.NodesWithTypes;
+drop table if exists :SCHEMA.ReactionsWithReversibility;
+drop table if exists :SCHEMA.EnzymeEdges;
+drop table if exists :SCHEMA.ParentNodes;
+drop table if exists :SCHEMA.NodesWithParents;
+drop table if exists :SCHEMA.EnzymeReactions;
+drop table if exists :SCHEMA.ParentsForEdges;
+
From 40543879bdf879df78a0c3aae6851b0bc50c2bd7 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 13:14:14 -0400
Subject: [PATCH 040/112] missing taxon.
---
Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
index d00a1a5e8d..29964ad4b5 100644
--- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
@@ -25,7 +25,7 @@
1 as is_top_level,
sequence.na_sequence_id, organism.genome_source,
organism.name_for_filenames, coalesce(msa.has_msa, 0) as has_msa
- FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon_id = :TAXON_ID and taxon.taxon_id = organism.taxon_id,
+ FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon.taxon_id = :TAXON_ID and taxon.taxon_id = organism.taxon_id,
sres.OntologyTerm so,
( SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description,
a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id
From 7e684ca1717a728f7c41be0e3c13f991d9282cba Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 14:03:38 -0400
Subject: [PATCH 041/112] debug
---
Model/lib/psql/webready/orgSpecific/GoTermSummary.psql | 2 +-
Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql | 4 ++--
Model/lib/psql/webready/orgSpecific/PANIO.psql | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
index a233539f0c..3d2fb4507f 100644
--- a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
+++ b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
@@ -49,7 +49,7 @@
AND edr.external_database_release_id = ot.external_database_release_id
AND edr.external_database_id = ed.external_database_id
AND ed.name ='GO_RSRC'
- WHERE ggt.org_abbrev = ':ORG_ABBREV'
+ AND ggt.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql
index c1359297b9..78bff1d74b 100644
--- a/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql
@@ -1,8 +1,8 @@
- CREATE INDEX :SCHEMA.iuc_srcid_ix
+ CREATE INDEX iuc_srcid_ix
ON :SCHEMA.IntronUtrCoords (source_id, na_feature_id)
;
- CREATE INDEX :SCHEMA.iuc_nfid_ix
+ CREATE INDEX iuc_nfid_ix
ON :SCHEMA.IntronUtrCoords (na_feature_id, source_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql
index c7afcbeca5..7eefd21f03 100644
--- a/Model/lib/psql/webready/orgSpecific/PANIO.psql
+++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql
@@ -9,7 +9,7 @@
out_type.source_id as output_pan_type_source_id,
--out_type.name as output_pan_type,
out_type.ontology_term_id as output_pan_type_id
- FROM :SCHEMA.panextdbrls panExtDbRls,
+ FROM :SCHEMA.panextdbrls panExtDbRls
INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id
INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id
INNER JOIN study.Output o AND o.protocol_app_id = pa.protocol_app_id
From a94bc2db90808b9e3f246253b2caab643726f628 Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Thu, 22 May 2025 14:40:26 -0400
Subject: [PATCH 042/112] est tables to use is_reference and update notes
---
.../webready/orgSpecific/EstAttributes.psql | 26 +++++++++++------
.../webready/orgSpecific/EstSequence.psql | 29 +++++++++++--------
Model/lib/xml/tuningManager/webtables.org | 1 +
3 files changed, 35 insertions(+), 21 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
index 17da985e7f..c777a3a288 100644
--- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
@@ -1,9 +1,8 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE EstAttributes AS
SELECT
- cast(apidb.project_id(tn.name) as varchar(20)) as project_id,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
ens.source_id,
e.seq_primer AS primer,
ens.a_count,
@@ -26,13 +25,21 @@
l.library_id, replace(l.dbest_name, '''', '-') as library_dbest_name
FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm,
sres.TaxonName tn, sres.ExternalDatabase ed,
- sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens
+ sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens,
+ apidb.datasource ds, apidb.organism o
LEFT JOIN
(select query_na_sequence_id,max(ct) as best_alignment_count
from (
SELECT query_na_sequence_id, COUNT(*) AS ct
- FROM dots.BlatAlignment ba
+ FROM dots.BlatAlignment ba, apidb.datasource, apidb.organism o,
+ sres.externaldatabase d, sres.externaldatabaserelease r
WHERE is_best_alignment = 1
+ AND ba.query_external_db_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND ed.name = ds.name
+ AND ds.taxon_id = o.taxon_id
+ AND o.is_reference = 1
+ AND o.taxon_id = :TAXON_ID
GROUP BY target_external_db_release_id,query_na_sequence_id) t
group by query_na_sequence_id
) best ON ens.na_sequence_id = best.query_na_sequence_id
@@ -44,8 +51,9 @@
AND ens.external_database_release_id = edr.external_database_release_id
AND edr.external_database_id = ed.external_database_id
AND ens.sequence_ontology_id = oterm.ontology_term_id
+ AND ed.name = ds.name
+ and ds.taxon_id = o.taxon_id
+ and o.is_reference = 1
+ and o.taxon_id = :TAXON_ID
AND oterm.name = 'EST'
-
-
:DECLARE_PARTITION;
-
diff --git a/Model/lib/psql/webready/orgSpecific/EstSequence.psql b/Model/lib/psql/webready/orgSpecific/EstSequence.psql
index 9dc3effb71..e31eeca39c 100644
--- a/Model/lib/psql/webready/orgSpecific/EstSequence.psql
+++ b/Model/lib/psql/webready/orgSpecific/EstSequence.psql
@@ -1,16 +1,21 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE EstSequence AS
- SELECT ens.source_id,
- cast(apidb.project_id(tn.name) as varchar(20)) as project_id,
- ens.sequence
- FROM dots.ExternalNaSequence ens, sres.OntologyTerm oterm, sres.TaxonName tn
+ SELECT ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ , ens.source_id
+ , ens.sequence
+ FROM dots.ExternalNaSequence ens
+ , sres.OntologyTerm oterm
+ , apidb.organism o
+ , sres.externaldatabase d
+ , sres.externaldatabaserelease r
+ , apidb.datasource ds
WHERE oterm.name = 'EST'
AND oterm.ontology_term_id = ens.sequence_ontology_id
- AND ens.taxon_id = tn.taxon_id
- AND tn.name_class = 'scientific name'
-
-
+ AND ens.external_database_release_id = r.external_database_release_id
+ AND r.external_database_id = d.external_database_id
+ AND d.name = ds.name
+ AND ds.taxon_id = o.taxon_id
+ AND ds.taxon_id = :TAXON_ID
+ AND o.is_reference = 1
:DECLARE_PARTITION;
-
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index fa0ecc2bde..0c80030b59 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -23,6 +23,7 @@
- [X] SequencePieceClosure
- [X] GenomicSeqAttributes
- [s] SequenceEnzymeClass
+ - Temp remove this and eventually Move to ComparativeGenomics because it depends on the OrthoMCL Derived EC Numbers
- Transcript / Protein
- [X] SignalPeptideDomains_ix.psql
From 4005884f2e6ba056cda44cb8910d1425459ffc81 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 14:57:47 -0400
Subject: [PATCH 043/112] debug
---
Model/lib/psql/webready/orgSpecific/GoTermSummary.psql | 3 ++-
Model/lib/psql/webready/orgSpecific/PANIO.psql | 4 ++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
index 3d2fb4507f..db5e946533 100644
--- a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
+++ b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql
@@ -21,7 +21,8 @@
) gs ON ggt.go_term_id = gs.ontology_term_id
WHERE ggt.org_abbrev = ':ORG_ABBREV'
UNION
- SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
+ SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id,
ggt.taxon_id, ggt.is_not,
replace (substr(ot.source_id, 1, 25),'_',':') as go_id,
ot.ontology_term_id as go_term_id, ggt.ontology,
diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql
index 7eefd21f03..ac61b072fa 100644
--- a/Model/lib/psql/webready/orgSpecific/PANIO.psql
+++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql
@@ -1,5 +1,5 @@
:CREATE_AND_POPULATE
- SELECT DISTINCT io.*
+ SELECT DISTINCT io.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
FROM (
SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id,
o.protocol_app_node_id output_pan_id,
@@ -12,7 +12,7 @@
FROM :SCHEMA.panextdbrls panExtDbRls
INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id
INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id
- INNER JOIN study.Output o AND o.protocol_app_id = pa.protocol_app_id
+ INNER JOIN study.Output o o.protocol_app_id = pa.protocol_app_id
INNER JOIN study.ProtocolAppNode in_pan on i.protocol_app_node_id = in_pan.protocol_app_node_id
INNER JOIN study.ProtocolAppNode out_pan on o.protocol_app_node_id = out_pan.protocol_app_node_id
LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id
From 33b8248cda983a056c5496134c4c2d3d2cfbc844 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 15:09:25 -0400
Subject: [PATCH 044/112] debug
---
Model/lib/psql/webready/orgSpecific/PANIO.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql
index ac61b072fa..f4824b6f27 100644
--- a/Model/lib/psql/webready/orgSpecific/PANIO.psql
+++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql
@@ -1,5 +1,5 @@
:CREATE_AND_POPULATE
- SELECT DISTINCT io.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date,
+ SELECT DISTINCT io.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (
SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id,
o.protocol_app_node_id output_pan_id,
From 9d49eb15a523e94645f1a4ed62461bd72f672447 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 15:10:52 -0400
Subject: [PATCH 045/112] debug
---
.../webready/orgSpecific/GenomicSeqAttributes_ix.psql | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
index 039749930e..54f8016119 100644
--- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
@@ -1,11 +1,11 @@
- create unique pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id)
+ create unique index pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id)
;
- create unique SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id)
+ create unique index SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id)
;
- create unique SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id)
+ create unique index SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id)
;
- create unique SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id)
+ create unique index SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id)
;
From 97f480cc89eff79398cc74e7dd74867fda7520c2 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 15:22:38 -0400
Subject: [PATCH 046/112] debug
---
Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql | 4 ++--
Model/lib/psql/webready/orgSpecific/PANIO.psql | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql
index 8c5134b475..2d0531bdfa 100644
--- a/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql
@@ -1,7 +1,7 @@
- create GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source)
+ create index GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source)
;
- create GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary
+ create index GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary
(ontology, gene_source_id, is_not, is_go_slim,
go_id, go_term_name, evidence_code, evidence_category)
;
diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql
index f4824b6f27..b9b918779f 100644
--- a/Model/lib/psql/webready/orgSpecific/PANIO.psql
+++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql
@@ -12,7 +12,7 @@
FROM :SCHEMA.panextdbrls panExtDbRls
INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id
INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id
- INNER JOIN study.Output o o.protocol_app_id = pa.protocol_app_id
+ INNER JOIN study.Output o on o.protocol_app_id = pa.protocol_app_id
INNER JOIN study.ProtocolAppNode in_pan on i.protocol_app_node_id = in_pan.protocol_app_node_id
INNER JOIN study.ProtocolAppNode out_pan on o.protocol_app_node_id = out_pan.protocol_app_node_id
LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id
From dc1f03886c72e2ad748ebc5bca634aba3eb01eb2 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 15:40:34 -0400
Subject: [PATCH 047/112] debug
---
.../webready/orgSpecific/GenomicSeqAttributes_ix.psql | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
index 54f8016119..de455dd7f4 100644
--- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql
@@ -1,11 +1,11 @@
- create unique index pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id)
+ create unique index pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (org_abbrev, lower(source_id), project_id)
;
- create unique index SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id)
+ create unique index SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (org_abbrev, source_id)
;
- create unique index SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id)
+ create unique index SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (org_abbrev, na_sequence_id)
;
- create unique index SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id)
+ create unique index SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (org_abbrev, taxon_id, source_id)
;
From f03dad2e45bb812e634374ade439074362613e4d Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 15:43:06 -0400
Subject: [PATCH 048/112] debug
---
Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql
index eed35a9960..3cad24bdcc 100644
--- a/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql
@@ -1,6 +1,6 @@
- create unique index EstAttr_source_id ON :SCHEMA.EstAttributes (source_id)
+ create unique index EstAttr_source_id ON :SCHEMA.EstAttributes (org_abbrev, source_id)
;
From 9cbcbc9ec36c30e0825792a3c46b2cdeb4b519a2 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 15:54:21 -0400
Subject: [PATCH 049/112] debug
---
Model/lib/psql/webready/orgSpecific/EstAttributes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
index c777a3a288..5550ae5d67 100644
--- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
@@ -36,7 +36,7 @@
WHERE is_best_alignment = 1
AND ba.query_external_db_release_id = r.external_database_release_id
AND r.external_database_id = d.external_database_id
- AND ed.name = ds.name
+ AND d.name = ds.name
AND ds.taxon_id = o.taxon_id
AND o.is_reference = 1
AND o.taxon_id = :TAXON_ID
From affcf6d652cb24a15259da1d0947ba965737f818 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 16:10:42 -0400
Subject: [PATCH 050/112] debug indexes
---
.../orgSpecific/ChrCopyNumbers_ix.psql | 4 ++--
.../orgSpecific/GeneAttributes_ix.psql | 24 +++++++++----------
.../webready/orgSpecific/GeneGoTable_ix.psql | 2 +-
.../orgSpecific/GeneIntJuncStats_ix.psql | 2 +-
.../orgSpecific/GeneIntronJunction_ix.psql | 6 ++---
.../orgSpecific/GeneModelDump_ix.psql | 2 +-
.../GenomicSequenceSequence_ix.psql | 2 +-
.../orgSpecific/NameMappingGIJ_ix.psql | 2 +-
.../psql/webready/orgSpecific/PANIO_ix.psql | 8 +++----
.../orgSpecific/PathwaysGeneTable_ix.psql | 2 +-
.../orgSpecific/TranscriptAttributes_ix.psql | 22 ++++++++---------
11 files changed, 38 insertions(+), 38 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql
index 4f989590a9..c5a7eff545 100644
--- a/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql
@@ -1,9 +1,9 @@
- CREATE ChrCN_ix
+ CREATE index ChrCN_ix
ON :SCHEMA.ChrCopyNumbers (input_pan_id, na_sequence_id)
;
- CREATE ChrCN_output
+ CREATE index ChrCN_output
ON :SCHEMA.ChrCopyNumbers (output_pan_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql
index 9f45a0abad..2e66f521e6 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql
@@ -1,47 +1,47 @@
- CREATE UNIQUE GeneAttr_srcPrj
- ON :SCHEMA.GeneAttributes (source_id)
+ CREATE UNIQUE INDEX GeneAttr_srcPrj
+ ON :SCHEMA.GeneAttributes (org_abbrev, source_id)
;
- CREATE GeneAttr_exon_ix
+ CREATE INDEX GeneAttr_exon_ix
ON :SCHEMA.GeneAttributes (exon_count, source_id, project_id)
;
- CREATE GeneAttr_loc_ix
+ CREATE INDEX GeneAttr_loc_ix
ON :SCHEMA.GeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated)
;
- CREATE GeneAttr_feat_ix
+ CREATE INDEX GeneAttr_feat_ix
ON :SCHEMA.GeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed)
;
- CREATE GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes (
+ CREATE INDEX GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes (
orthomcl_name, source_id, taxon_id, gene_type, na_feature_id,
na_sequence_id, start_min, end_max, organism, species,
product, project_id
)
;
- CREATE GeneAttr_ortholog_ix
+ CREATE INDEX GeneAttr_ortholog_ix
ON :SCHEMA.GeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id)
;
- CREATE GeneAttr_orgsrc_ix
+ CREATE INDEX GeneAttr_orgsrc_ix
ON :SCHEMA.GeneAttributes (organism, source_id, na_sequence_id, start_min, end_max)
;
- CREATE GeneAttr_prjsrc_ix
+ CREATE INDEX GeneAttr_prjsrc_ix
ON :SCHEMA.GeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0))
;
- CREATE GeneAttr_txid_ix
+ CREATE INDEX GeneAttr_txid_ix
ON :SCHEMA.GeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id)
;
- CREATE GeneAttr_ids_ix
+ CREATE INDEX GeneAttr_ids_ix
ON :SCHEMA.GeneAttributes (na_feature_id, source_id, project_id)
;
- CREATE GeneAttr_loc_intjunc_ix
+ CREATE INDEX GeneAttr_loc_intjunc_ix
ON :SCHEMA.GeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX)
;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql
index a065517030..5e34d52885 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql
@@ -1,4 +1,4 @@
- create ggtab_ix ON :SCHEMA.GeneGoTable
+ create index ggtab_ix ON :SCHEMA.GeneGoTable
(source_id, project_id, go_id, transcript_ids, is_not, go_term_name,
ontology, source, evidence_code, reference, evidence_code_parameter, sort_key)
;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql
index 89389667a0..89d3004344 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql
@@ -1,3 +1,3 @@
- create GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id)
+ create index GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
index 3c0cff5069..ad0f7593c5 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
@@ -1,10 +1,10 @@
- create gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
+ create index gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
;
- create gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id)
+ create index gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id)
;
- create gijnew_txnloc_ix
+ create index gijnew_txnloc_ix
on :SCHEMA.GeneIntronJunction
(taxon_id, na_sequence_id, segment_start, segment_end, is_reversed,
total_unique, total_isrpm, annotated_intron)
diff --git a/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql
index e4388deb01..2a73000f95 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql
@@ -1,4 +1,4 @@
- create gmd_ix
+ create index gmd_ix
on :SCHEMA.GeneModelDump
(source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids)
;
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql
index 497278e368..dc3711740d 100644
--- a/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql
@@ -1,3 +1,3 @@
- create GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id)
+ create index GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql
index 4245fce828..c386e6ba00 100644
--- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql
@@ -1,3 +1,3 @@
- create namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id)
+ create index namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql b/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql
index 9bbb34ec21..f809a0b722 100644
--- a/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql
@@ -1,4 +1,4 @@
- create painio2_iix on :SCHEMA.PANIO
+ create index painio2_iix on :SCHEMA.PANIO
(input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
@@ -6,7 +6,7 @@
- create painio2_oix on :SCHEMA.PANIO
+ create index painio2_oix on :SCHEMA.PANIO
(output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id)
@@ -14,7 +14,7 @@
- create painio2_otypeix on :SCHEMA.PANIO
+ create index painio2_otypeix on :SCHEMA.PANIO
(output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id)
@@ -22,7 +22,7 @@
- create painio2_itypeix on :SCHEMA.PANIO
+ create index painio2_itypeix on :SCHEMA.PANIO
(input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id)
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql
index 40750a7f84..f69349df0c 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql
@@ -1,4 +1,4 @@
- create pgt_ix on :SCHEMA.PathwaysGeneTable
+ create index pgt_ix on :SCHEMA.PathwaysGeneTable
(gene_source_id, project_id, pathway_source_id, pathway_name,
reactions, enzyme, expasy_url, pathway_source, exact_match)
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql
index dd1670b4a3..d0eee715fd 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql
@@ -1,39 +1,39 @@
CREATE UNIQUE INDEX TranscriptAttr_sourceId
- ON :SCHEMA.TranscriptAttributes (source_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, source_id)
;
CREATE UNIQUE INDEX TranscriptAttr_srcPrj
- ON :SCHEMA.TranscriptAttributes (source_id, gene_source_id, project_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, source_id, gene_source_id, project_id)
;
CREATE UNIQUE INDEX TranscriptAttr_genesrc
- ON :SCHEMA.TranscriptAttributes (gene_source_id, source_id, project_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_source_id, source_id, project_id)
;
CREATE UNIQUE INDEX TranscriptAttr_exon_ix
- ON :SCHEMA.TranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_exon_count, source_id, gene_source_id, project_id)
;
CREATE UNIQUE INDEX TranscriptAttr_loc_ix
ON :SCHEMA.TranscriptAttributes
- (na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id,
+ (org_abbrev, na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id,
is_deprecated, source_id, gene_source_id, project_id)
;
CREATE UNIQUE INDEX TranscriptAttr_feat_ix
- ON :SCHEMA.TranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, na_feature_id, source_id, gene_source_id, project_id)
;
CREATE UNIQUE INDEX TranscriptAttr_geneid_ix
- ON :SCHEMA.TranscriptAttributes (gene_id, source_id, gene_source_id, project_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_id, source_id, gene_source_id, project_id)
;
CREATE UNIQUE INDEX TransAttr_orthoname_ix
- ON :SCHEMA.TranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id)
;
CREATE UNIQUE INDEX TransAttr_molwt_ix
- ON :SCHEMA.TranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, taxon_id, molecular_weight, source_id, gene_source_id, project_id)
;
CREATE INDEX TransAttr_ortholog_ix
@@ -55,13 +55,13 @@
CREATE UNIQUE INDEX TrnscrptAttr_geneinfo
ON :SCHEMA.TranscriptAttributes
- (gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id,
+ (org_abbrev, gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id,
protein_source_id, na_sequence_id, length, protein_length,
five_prime_utr_length, three_prime_utr_length)
;
CREATE UNIQUE INDEX TranscriptAttr_genenaf
- ON :SCHEMA.TranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id)
+ ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_na_feature_id, gene_source_id, source_id, project_id)
;
CREATE INDEX TransAttr_locsIds_ix
From 73a0c35c454bb263bf433257fb4fab0e779f41f7 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 16:14:04 -0400
Subject: [PATCH 051/112] debug indexes
---
Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
index a5cfad9f82..4968f458a8 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
@@ -80,7 +80,7 @@
- create index :SCHEMA.:ORG_ABBREVProteinGoAttr_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id)
+ create index ProteinGoAttr_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id)
;
From 6b97ff9875de545c034c99ddf37809762234a4b0 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 16:38:57 -0400
Subject: [PATCH 052/112] debug indexes
---
Model/lib/psql/webready/orgSpecific/EstAttributes.psql | 6 +++---
Model/lib/psql/webready/orgSpecific/EstSequence.psql | 2 +-
Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
index 5550ae5d67..110b43a673 100644
--- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
@@ -31,14 +31,14 @@
(select query_na_sequence_id,max(ct) as best_alignment_count
from (
SELECT query_na_sequence_id, COUNT(*) AS ct
- FROM dots.BlatAlignment ba, apidb.datasource, apidb.organism o,
+ FROM dots.BlatAlignment ba, apidb.datasource ds, apidb.organism o,
sres.externaldatabase d, sres.externaldatabaserelease r
WHERE is_best_alignment = 1
AND ba.query_external_db_release_id = r.external_database_release_id
AND r.external_database_id = d.external_database_id
AND d.name = ds.name
AND ds.taxon_id = o.taxon_id
- AND o.is_reference = 1
+ AND o.is_reference_strain = 1
AND o.taxon_id = :TAXON_ID
GROUP BY target_external_db_release_id,query_na_sequence_id) t
group by query_na_sequence_id
@@ -53,7 +53,7 @@
AND ens.sequence_ontology_id = oterm.ontology_term_id
AND ed.name = ds.name
and ds.taxon_id = o.taxon_id
- and o.is_reference = 1
+ and o.is_reference_strain = 1
and o.taxon_id = :TAXON_ID
AND oterm.name = 'EST'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/EstSequence.psql b/Model/lib/psql/webready/orgSpecific/EstSequence.psql
index e31eeca39c..58bc1dfd95 100644
--- a/Model/lib/psql/webready/orgSpecific/EstSequence.psql
+++ b/Model/lib/psql/webready/orgSpecific/EstSequence.psql
@@ -17,5 +17,5 @@
AND d.name = ds.name
AND ds.taxon_id = o.taxon_id
AND ds.taxon_id = :TAXON_ID
- AND o.is_reference = 1
+ AND o.is_reference_strain = 1
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
index 4968f458a8..52bb9d668e 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
@@ -158,7 +158,7 @@
row_number() over (partition by t.source_id order by tas.length desc) as rank_in_transcript,
uniprot.uniprot_ids
FROM
- dots.Transcript t,
+ dots.Transcript t
INNER JOIN dots.GeneFeature gf ON gf.na_feature_id = t.parent_id
INNER JOIN dots.nasequence nas ON gf.na_sequence_id = nas.na_sequence_id AND nas.taxon_id = :TAXON_ID
INNER JOIN dots.TranslatedAaFeature taf ON t.na_feature_id = taf.na_feature_id
From 61b40b1ed43e8a0e383d13487601a50fa3b9dbe6 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 16:59:57 -0400
Subject: [PATCH 053/112] debug
---
Model/lib/psql/webready/orgSpecific/EstAttributes.psql | 4 ++--
Model/lib/psql/webready/orgSpecific/GeneId_ix.psql | 10 +++++-----
.../psql/webready/orgSpecific/ProteinAttributes.psql | 8 ++++----
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
index 110b43a673..4564bd2631 100644
--- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql
@@ -25,8 +25,8 @@
l.library_id, replace(l.dbest_name, '''', '-') as library_dbest_name
FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm,
sres.TaxonName tn, sres.ExternalDatabase ed,
- sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens,
- apidb.datasource ds, apidb.organism o
+ apidb.datasource ds, apidb.organism o,
+ sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens
LEFT JOIN
(select query_na_sequence_id,max(ct) as best_alignment_count
from (
diff --git a/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql
index 2f2fd86202..5e3f12dce7 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql
@@ -1,15 +1,15 @@
- CREATE INDEX :SCHEMA.GeneId_gene_idx ON :SCHEMA.GeneId (gene, id)
+ CREATE INDEX GeneId_gene_idx ON :SCHEMA.GeneId (gene, id)
;
- CREATE INDEX :SCHEMA.GeneId_id_idx ON :SCHEMA.GeneId (id, gene)
+ CREATE INDEX GeneId_id_idx ON :SCHEMA.GeneId (id, gene)
;
- CREATE INDEX :SCHEMA.GeneId_uniqid_idx ON :SCHEMA.GeneId (unique_mapping, id, gene)
+ CREATE INDEX GeneId_uniqid_idx ON :SCHEMA.GeneId (unique_mapping, id, gene)
;
- CREATE INDEX :SCHEMA.GeneId_lowid_idx ON :SCHEMA.GeneId (lower(id), gene)
+ CREATE INDEX GeneId_lowid_idx ON :SCHEMA.GeneId (lower(id), gene)
;
- CREATE INDEX :SCHEMA.GeneId_uniqlowid_idx ON :SCHEMA.GeneId (unique_mapping, lower(id), gene)
+ CREATE INDEX GeneId_uniqlowid_idx ON :SCHEMA.GeneId (unique_mapping, lower(id), gene)
;
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
index 52bb9d668e..1f0e0f78e9 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
@@ -85,9 +85,9 @@
;
- DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp;
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp;
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp AS
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp AS
SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers
FROM (SELECT DISTINCT asec.aa_sequence_id,
ec.ec_number || ' (' || ec.description || ')' AS ec_number
@@ -101,10 +101,10 @@
;
- DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp;
+ DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp;
--TODO: these rows will not exist in org specific land
- -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS
+ -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp AS
-- SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived
-- FROM (SELECT DISTINCT asec.aa_sequence_id,
-- ec.ec_number || ' (' || ec.description || ')' AS ec_number
From 6dacc1cf6167d3d7739c6cc01ef00abf7e9b7d11 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 17:03:50 -0400
Subject: [PATCH 054/112] debug
---
.../webready/orgSpecific/NameMappingGIJ.psql | 309 +++++++++++++++++-
1 file changed, 307 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
index 678512b373..8ac689abd8 100644
--- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
+++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
@@ -4,7 +4,7 @@
WITH ij AS (
SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name,
regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name
- FROM panio p, panio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ FROM :SCHEMA.:ORG_ABBREVpanio p, :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
AND pj.input_pan_id = p.input_pan_id
AND p.output_pan_id = pan.protocol_app_node_id
@@ -65,7 +65,312 @@
cb.value::NUMERIC as number_mapped_reads,
cc.value::NUMERIC as avg_mapping_coverage
FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- PANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb,
+ :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
+ AND pj.input_pan_id = p.input_pan_id
+ AND p.output_pan_id = pan.protocol_app_node_id
+ AND pan.name like '%tpm - unique%'
+ AND p.output_pan_id = nafe.protocol_app_node_id
+ GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
+ ORDER BY pj.output_pan_id
+ ) , stats AS (
+ SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
+ sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
+ FROM apidb.IntronJunction
+ WHERE unique_reads >= 1
+ GROUP BY protocol_app_node_id
+ ), part AS (
+ SELECT
+ ij.junctions_pan_id, ij.avg_value, stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT * FROM (
+ SELECT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ) t
+ ORDER BY junctions_pan_id
+
+ ;
+
+
+
+ create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
+
+ ;
+
+ drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
+ junctions_pan_id,
+ read_length,
+ mapped_reads,
+ avg_mapping_coverage,
+ num_replicates
+ ) AS
+ SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
+ round(avg(number_mapped_reads),1) as mapped_reads,
+ round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
+ as avg_mapping_coverage,
+ count(*) as num_replicates
+ FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
+ cb.value::NUMERIC as number_mapped_reads,
+ cc.value::NUMERIC as avg_mapping_coverage
+ FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
+ :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
+ AND pj.input_pan_id = p.input_pan_id
+ AND p.output_pan_id = pan.protocol_app_node_id
+ AND pan.name like '%tpm - unique%'
+ AND p.output_pan_id = nafe.protocol_app_node_id
+ GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
+ ORDER BY pj.output_pan_id
+ ) , stats AS (
+ SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
+ sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
+ FROM apidb.IntronJunction
+ WHERE unique_reads >= 1
+ GROUP BY protocol_app_node_id
+ ), part AS (
+ SELECT
+ ij.junctions_pan_id, ij.avg_value, stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT * FROM (
+ SELECT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ) t
+ ORDER BY junctions_pan_id
+
+ ;
+
+
+
+ create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
+
+ ;
+
+ drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
+ junctions_pan_id,
+ read_length,
+ mapped_reads,
+ avg_mapping_coverage,
+ num_replicates
+ ) AS
+ SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
+ round(avg(number_mapped_reads),1) as mapped_reads,
+ round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
+ as avg_mapping_coverage,
+ count(*) as num_replicates
+ FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
+ cb.value::NUMERIC as number_mapped_reads,
+ cc.value::NUMERIC as avg_mapping_coverage
+ FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
+ :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
+ AND pj.input_pan_id = p.input_pan_id
+ AND p.output_pan_id = pan.protocol_app_node_id
+ AND pan.name like '%tpm - unique%'
+ AND p.output_pan_id = nafe.protocol_app_node_id
+ GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
+ ORDER BY pj.output_pan_id
+ ) , stats AS (
+ SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
+ sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
+ FROM apidb.IntronJunction
+ WHERE unique_reads >= 1
+ GROUP BY protocol_app_node_id
+ ), part AS (
+ SELECT
+ ij.junctions_pan_id, ij.avg_value, stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT * FROM (
+ SELECT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ) t
+ ORDER BY junctions_pan_id
+
+ ;
+
+
+
+ create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
+
+ ;
+
+ drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
+ junctions_pan_id,
+ read_length,
+ mapped_reads,
+ avg_mapping_coverage,
+ num_replicates
+ ) AS
+ SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
+ round(avg(number_mapped_reads),1) as mapped_reads,
+ round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
+ as avg_mapping_coverage,
+ count(*) as num_replicates
+ FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
+ cb.value::NUMERIC as number_mapped_reads,
+ cc.value::NUMERIC as avg_mapping_coverage
+ FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
+ :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
+ AND pj.input_pan_id = p.input_pan_id
+ AND p.output_pan_id = pan.protocol_app_node_id
+ AND pan.name like '%tpm - unique%'
+ AND p.output_pan_id = nafe.protocol_app_node_id
+ GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
+ ORDER BY pj.output_pan_id
+ ) , stats AS (
+ SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
+ sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
+ FROM apidb.IntronJunction
+ WHERE unique_reads >= 1
+ GROUP BY protocol_app_node_id
+ ), part AS (
+ SELECT
+ ij.junctions_pan_id, ij.avg_value, stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT * FROM (
+ SELECT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ) t
+ ORDER BY junctions_pan_id
+
+ ;
+
+
+
+ create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
+
+ ;
+
+ drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
+ junctions_pan_id,
+ read_length,
+ mapped_reads,
+ avg_mapping_coverage,
+ num_replicates
+ ) AS
+ SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
+ round(avg(number_mapped_reads),1) as mapped_reads,
+ round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
+ as avg_mapping_coverage,
+ count(*) as num_replicates
+ FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
+ cb.value::NUMERIC as number_mapped_reads,
+ cc.value::NUMERIC as avg_mapping_coverage
+ FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
+ :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
+ AND pj.input_pan_id = p.input_pan_id
+ AND p.output_pan_id = pan.protocol_app_node_id
+ AND pan.name like '%tpm - unique%'
+ AND p.output_pan_id = nafe.protocol_app_node_id
+ GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
+ ORDER BY pj.output_pan_id
+ ) , stats AS (
+ SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
+ sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
+ FROM apidb.IntronJunction
+ WHERE unique_reads >= 1
+ GROUP BY protocol_app_node_id
+ ), part AS (
+ SELECT
+ ij.junctions_pan_id, ij.avg_value, stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT * FROM (
+ SELECT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ) t
+ ORDER BY junctions_pan_id
+
+ ;
+
+
+
+ create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
+
+ ;
+
+ drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
+
+ CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
+ junctions_pan_id,
+ read_length,
+ mapped_reads,
+ avg_mapping_coverage,
+ num_replicates
+ ) AS
+ SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
+ round(avg(number_mapped_reads),1) as mapped_reads,
+ round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
+ as avg_mapping_coverage,
+ count(*) as num_replicates
+ FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
+ cb.value::NUMERIC as number_mapped_reads,
+ cc.value::NUMERIC as avg_mapping_coverage
+ FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
+ :SCHEMA.:ORG_ABBREVPANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb,
STUDY.CHARACTERISTIC cc, sres.ontologyterm otc
WHERE je.junctions_pan_id = ioa.output_pan_id
AND ioa.input_pan_id = ca.protocol_app_node_id
From 3235c0c08e86322745b3cc1334f20b5e8db0d3e9 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 17:10:46 -0400
Subject: [PATCH 055/112] debug
---
Model/lib/psql/webready/orgSpecific/GeneId.psql | 4 ++--
Model/lib/psql/webready/orgSpecific/ProteinSequence.psql | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GeneId.psql b/Model/lib/psql/webready/orgSpecific/GeneId.psql
index 75b1bfdab5..315917f872 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneId.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneId.psql
@@ -13,7 +13,7 @@ create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as
)
;
-create index :SCHEMA.:ORG_ABBREVGeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id)
+create index GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id)
;
@@ -261,7 +261,7 @@ create index :SCHEMA.:ORG_ABBREVGeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_AB
- CREATE UNIQUE INDEX :ORG_ABBREV_gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id)
+ CREATE UNIQUE INDEX gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql
index 68a8664412..0d2f582182 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql
@@ -1,7 +1,7 @@
:CREATE_AND_POPULATE
WITH pAttr AS (
SELECT distinct source_id, aa_sequence_id
- FROM ProteinAttributes where org_abbrev = ':ORG_ABBREV')
+ FROM :SCHEMA.ProteinAttributes where org_abbrev = ':ORG_ABBREV')
SELECT pa.source_id, tas.sequence,
':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM pAttr pa, dots.TranslatedAaSequence tas
From 1b3de8eb4adeacebeef3361328e9258e404ad13b Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 17:20:00 -0400
Subject: [PATCH 056/112] debug
---
.../webready/orgSpecific/NameMappingGIJ.psql | 306 +-----------------
1 file changed, 7 insertions(+), 299 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
index 8ac689abd8..1dbca91c94 100644
--- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
+++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
@@ -4,12 +4,14 @@
WITH ij AS (
SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name,
regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name
- FROM :SCHEMA.:ORG_ABBREVpanio p, :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ FROM :SCHEMA.panio p, :SCHEMA.panio pj, results.nafeatureexpression nafe, study.protocolappnode pan
WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
AND pj.input_pan_id = p.input_pan_id
AND p.output_pan_id = pan.protocol_app_node_id
AND pan.name like '%tpm - unique%'
AND p.output_pan_id = nafe.protocol_app_node_id
+ and p.org_abbrev = ':ORG_ABBREV'
+ and pj.org_abbrev = ':ORG_ABBREV'
GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
ORDER BY pj.output_pan_id
) , stats AS (
@@ -43,7 +45,7 @@
- create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
+ create index junexpgijtmp_ix on :SCHEMA.:ORG_ABBREVJunExpGIJtmp(junctions_pan_id,exp_pan_id)
;
@@ -64,13 +66,14 @@
FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
cb.value::NUMERIC as number_mapped_reads,
cc.value::NUMERIC as avg_mapping_coverage
- FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
+ FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
+ panio pj, results.nafeatureexpression nafe, study.protocolappnode pan
WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
AND pj.input_pan_id = p.input_pan_id
AND p.output_pan_id = pan.protocol_app_node_id
AND pan.name like '%tpm - unique%'
AND p.output_pan_id = nafe.protocol_app_node_id
+ and pj.org_abbrev = ':ORG_ABBREV'
GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
ORDER BY pj.output_pan_id
) , stats AS (
@@ -103,301 +106,6 @@
;
-
- create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
-
- ;
-
- drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
-
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
- junctions_pan_id,
- read_length,
- mapped_reads,
- avg_mapping_coverage,
- num_replicates
- ) AS
- SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
- round(avg(number_mapped_reads),1) as mapped_reads,
- round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
- as avg_mapping_coverage,
- count(*) as num_replicates
- FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
- cb.value::NUMERIC as number_mapped_reads,
- cc.value::NUMERIC as avg_mapping_coverage
- FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
- WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
- AND pj.input_pan_id = p.input_pan_id
- AND p.output_pan_id = pan.protocol_app_node_id
- AND pan.name like '%tpm - unique%'
- AND p.output_pan_id = nafe.protocol_app_node_id
- GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
- ORDER BY pj.output_pan_id
- ) , stats AS (
- SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
- sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
- FROM apidb.IntronJunction
- WHERE unique_reads >= 1
- GROUP BY protocol_app_node_id
- ), part AS (
- SELECT
- ij.junctions_pan_id, ij.avg_value, stats.multiplier
- , max(ij.expression_pan_id) OVER w as max_exp_pan_id
- , max(ij.sample_name) OVER w as max_sample_Name
- , max(ij.exp_name) OVER w as max_exp_name
- FROM ij, stats
- WHERE ij.junctions_pan_id = stats.protocol_app_node_id
- WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
- )
- SELECT DISTINCT * FROM (
- SELECT junctions_pan_id
- , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
- , first_value(max_sample_name) OVER w1 as sample_name
- , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
- , multiplier
- FROM part
- WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
- ) t
- ORDER BY junctions_pan_id
-
- ;
-
-
-
- create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
-
- ;
-
- drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
-
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
- junctions_pan_id,
- read_length,
- mapped_reads,
- avg_mapping_coverage,
- num_replicates
- ) AS
- SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
- round(avg(number_mapped_reads),1) as mapped_reads,
- round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
- as avg_mapping_coverage,
- count(*) as num_replicates
- FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
- cb.value::NUMERIC as number_mapped_reads,
- cc.value::NUMERIC as avg_mapping_coverage
- FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
- WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
- AND pj.input_pan_id = p.input_pan_id
- AND p.output_pan_id = pan.protocol_app_node_id
- AND pan.name like '%tpm - unique%'
- AND p.output_pan_id = nafe.protocol_app_node_id
- GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
- ORDER BY pj.output_pan_id
- ) , stats AS (
- SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
- sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
- FROM apidb.IntronJunction
- WHERE unique_reads >= 1
- GROUP BY protocol_app_node_id
- ), part AS (
- SELECT
- ij.junctions_pan_id, ij.avg_value, stats.multiplier
- , max(ij.expression_pan_id) OVER w as max_exp_pan_id
- , max(ij.sample_name) OVER w as max_sample_Name
- , max(ij.exp_name) OVER w as max_exp_name
- FROM ij, stats
- WHERE ij.junctions_pan_id = stats.protocol_app_node_id
- WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
- )
- SELECT DISTINCT * FROM (
- SELECT junctions_pan_id
- , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
- , first_value(max_sample_name) OVER w1 as sample_name
- , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
- , multiplier
- FROM part
- WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
- ) t
- ORDER BY junctions_pan_id
-
- ;
-
-
-
- create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
-
- ;
-
- drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
-
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
- junctions_pan_id,
- read_length,
- mapped_reads,
- avg_mapping_coverage,
- num_replicates
- ) AS
- SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
- round(avg(number_mapped_reads),1) as mapped_reads,
- round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
- as avg_mapping_coverage,
- count(*) as num_replicates
- FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
- cb.value::NUMERIC as number_mapped_reads,
- cc.value::NUMERIC as avg_mapping_coverage
- FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
- WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
- AND pj.input_pan_id = p.input_pan_id
- AND p.output_pan_id = pan.protocol_app_node_id
- AND pan.name like '%tpm - unique%'
- AND p.output_pan_id = nafe.protocol_app_node_id
- GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
- ORDER BY pj.output_pan_id
- ) , stats AS (
- SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
- sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
- FROM apidb.IntronJunction
- WHERE unique_reads >= 1
- GROUP BY protocol_app_node_id
- ), part AS (
- SELECT
- ij.junctions_pan_id, ij.avg_value, stats.multiplier
- , max(ij.expression_pan_id) OVER w as max_exp_pan_id
- , max(ij.sample_name) OVER w as max_sample_Name
- , max(ij.exp_name) OVER w as max_exp_name
- FROM ij, stats
- WHERE ij.junctions_pan_id = stats.protocol_app_node_id
- WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
- )
- SELECT DISTINCT * FROM (
- SELECT junctions_pan_id
- , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
- , first_value(max_sample_name) OVER w1 as sample_name
- , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
- , multiplier
- FROM part
- WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
- ) t
- ORDER BY junctions_pan_id
-
- ;
-
-
-
- create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
-
- ;
-
- drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
-
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
- junctions_pan_id,
- read_length,
- mapped_reads,
- avg_mapping_coverage,
- num_replicates
- ) AS
- SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
- round(avg(number_mapped_reads),1) as mapped_reads,
- round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
- as avg_mapping_coverage,
- count(*) as num_replicates
- FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
- cb.value::NUMERIC as number_mapped_reads,
- cc.value::NUMERIC as avg_mapping_coverage
- FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan
- WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
- AND pj.input_pan_id = p.input_pan_id
- AND p.output_pan_id = pan.protocol_app_node_id
- AND pan.name like '%tpm - unique%'
- AND p.output_pan_id = nafe.protocol_app_node_id
- GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
- ORDER BY pj.output_pan_id
- ) , stats AS (
- SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
- sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
- FROM apidb.IntronJunction
- WHERE unique_reads >= 1
- GROUP BY protocol_app_node_id
- ), part AS (
- SELECT
- ij.junctions_pan_id, ij.avg_value, stats.multiplier
- , max(ij.expression_pan_id) OVER w as max_exp_pan_id
- , max(ij.sample_name) OVER w as max_sample_Name
- , max(ij.exp_name) OVER w as max_exp_name
- FROM ij, stats
- WHERE ij.junctions_pan_id = stats.protocol_app_node_id
- WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
- )
- SELECT DISTINCT * FROM (
- SELECT junctions_pan_id
- , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
- , first_value(max_sample_name) OVER w1 as sample_name
- , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
- , multiplier
- FROM part
- WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
- ) t
- ORDER BY junctions_pan_id
-
- ;
-
-
-
- create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id)
-
- ;
-
- drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
-
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
- junctions_pan_id,
- read_length,
- mapped_reads,
- avg_mapping_coverage,
- num_replicates
- ) AS
- SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
- round(avg(number_mapped_reads),1) as mapped_reads,
- round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
- as avg_mapping_coverage,
- count(*) as num_replicates
- FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
- cb.value::NUMERIC as number_mapped_reads,
- cc.value::NUMERIC as avg_mapping_coverage
- FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- :SCHEMA.:ORG_ABBREVPANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb,
- STUDY.CHARACTERISTIC cc, sres.ontologyterm otc
- WHERE je.junctions_pan_id = ioa.output_pan_id
- AND ioa.input_pan_id = ca.protocol_app_node_id
- AND ca.value is not null
- AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID
- AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length'
- AND ca.protocol_app_node_id = cb.protocol_app_node_id
- AND cb.value is not null
- AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID
- AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads'
- AND ca.protocol_app_node_id = cc.protocol_app_node_id
- AND cc.value is not null
- AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID
- AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage'
- ) t
- GROUP by Junctions_Pan_Id
-
- ;
-
-
-
- CREATE INDEX mpstats_pk_ix on MappingStatsGIJtmp
- (junctions_pan_id,read_length,mapped_reads,avg_mapping_coverage,num_replicates)
-
-
- ;
-
:CREATE_AND_POPULATE
From 130a80536e82e00385fb5b3ae9699e25eb896a31 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 17:34:32 -0400
Subject: [PATCH 057/112] debug
---
Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql | 2 +-
Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql
index 8dad2f7481..c91f81d84d 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql
@@ -1,3 +1,3 @@
- create index ProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id)
+ create index ProtSeq_ix on :SCHEMA.:ORG_ABBREVProteinSequence (source_id, project_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
index cc70739339..70a738e12d 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -6,7 +6,7 @@
substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal
FROM (SELECT DISTINCT t.na_feature_id, dr.primary_identifier as uniprot_id
FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t,
- dots.genefeature gf, dots.nasequence nas
+ dots.genefeature gf, dots.nasequence nas,
sres.ExternalDatabase d, sres.ExternalDatabaseRelease r
WHERE dr.db_ref_id = x.DB_REF_ID
AND t.parent_id = gf.na_feature_id
From 8a2e733ae48581b1a7ae2eae14684bba3df31bd6 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 17:39:39 -0400
Subject: [PATCH 058/112] debug
---
.../webready/orgSpecific/NameMappingGIJ.psql | 53 +++++++------------
1 file changed, 18 insertions(+), 35 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
index 1dbca91c94..f4ff44a447 100644
--- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
+++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
@@ -67,41 +67,24 @@
cb.value::NUMERIC as number_mapped_reads,
cc.value::NUMERIC as avg_mapping_coverage
FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- panio pj, results.nafeatureexpression nafe, study.protocolappnode pan
- WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
- AND pj.input_pan_id = p.input_pan_id
- AND p.output_pan_id = pan.protocol_app_node_id
- AND pan.name like '%tpm - unique%'
- AND p.output_pan_id = nafe.protocol_app_node_id
- and pj.org_abbrev = ':ORG_ABBREV'
- GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
- ORDER BY pj.output_pan_id
- ) , stats AS (
- SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
- sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
- FROM apidb.IntronJunction
- WHERE unique_reads >= 1
- GROUP BY protocol_app_node_id
- ), part AS (
- SELECT
- ij.junctions_pan_id, ij.avg_value, stats.multiplier
- , max(ij.expression_pan_id) OVER w as max_exp_pan_id
- , max(ij.sample_name) OVER w as max_sample_Name
- , max(ij.exp_name) OVER w as max_exp_name
- FROM ij, stats
- WHERE ij.junctions_pan_id = stats.protocol_app_node_id
- WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
- )
- SELECT DISTINCT * FROM (
- SELECT junctions_pan_id
- , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
- , first_value(max_sample_name) OVER w1 as sample_name
- , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
- , multiplier
- FROM part
- WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
- ) t
- ORDER BY junctions_pan_id
+ :SCHEMA.PANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb,
+ STUDY.CHARACTERISTIC cc, sres.ontologyterm otc
+ WHERE je.junctions_pan_id = ioa.output_pan_id
+ AND ioa.input_pan_id = ca.protocol_app_node_id
+ AND ca.value is not null
+ and ioa.org_abbrev = ':ORG_ABBREV'
+ AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID
+ AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length'
+ AND ca.protocol_app_node_id = cb.protocol_app_node_id
+ AND cb.value is not null
+ AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID
+ AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads'
+ AND ca.protocol_app_node_id = cc.protocol_app_node_id
+ AND cc.value is not null
+ AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID
+ AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage'
+ ) t
+ GROUP by Junctions_Pan_Id
;
From fc1c4701216faa461e7fe8096b7e71ef18fb8396 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 17:50:42 -0400
Subject: [PATCH 059/112] debug
---
Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql | 5 +----
.../lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +-
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
index f4ff44a447..d21a1b1cf4 100644
--- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
+++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
@@ -90,9 +90,6 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE NameMappingGIJ AS
SELECT DISTINCT edp.dataset_presenter_display_name as exp_name,
edp.external_database_name, je.sample_name,
je.junctions_pan_id, je.exp_pan_id,
@@ -101,7 +98,7 @@
substr(uj.value, 1, 4000) as include_unified_junctions,
ms.read_length, ms.mapped_reads, ms.avg_mapping_coverage, ms.num_replicates,
je.switch_strands, je.multiplier
- FROM junexpgijtmp je, study.nodeNodeSet sl, study.NodeSet s, ExternalDbDatasetPresenter edp,
+ FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, study.nodeNodeSet sl, study.NodeSet s, ExternalDbDatasetPresenter edp,
DatasetProperty dp, DatasetProperty sj, DatasetProperty uj, mappingstatsgijtmp ms
WHERE sl.protocol_app_node_id = je.junctions_pan_id
AND je.junctions_pan_id = ms.junctions_pan_id
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
index 70a738e12d..ebfb21590a 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -76,7 +76,7 @@
gsa.sequence_type,
gsa.chromosome_order_num, gsa.na_sequence_id
FROM dots.GeneFeature gf
- INNER JOIN dots.nasequence seq ON seq.na_sequence_id = gf.na_sequence_id and nas.taxon_id = :TAXON_ID
+ INNER JOIN dots.nasequence seq ON seq.na_sequence_id = gf.na_sequence_id and seq.taxon_id = :TAXON_ID
INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id
INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id
INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV'
From 26020b15a0764bd8b9e9ea19e4c97eceec066498 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 17:59:54 -0400
Subject: [PATCH 060/112] debug
---
Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 --
1 file changed, 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
index ebfb21590a..73c8db86ef 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -37,7 +37,6 @@
COALESCE(preferred_name.name, any_name.name) AS gene_name,
cast(coalesce(preferred_gene_product.product, any_gene_product.product, gf.product)
as VARCHAR(300)) as old_gene_product,
- COALESCE(gp.product, 'unspecified product') as gene_product,
REPLACE(so.name, '_', ' ') AS gene_type,
gf.name as gene_ebi_biotype,
gi.gene_id,
@@ -80,7 +79,6 @@
INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id
INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id
INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV'
- LEFT JOIN :SCHEMA.GeneProduct gp ON gf.source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV'
INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id
INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id
INNER JOIN :SCHEMA.GenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id and gsa.org_abbrev = ':ORG_ABBREV'
From 67bbd35761a90bc5b444d8172f04911b1ac24abf Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 18:01:18 -0400
Subject: [PATCH 061/112] debug
---
.../psql/webready/{orgSpecific => unknown}/NameMappingGIJ.psql | 0
.../psql/webready/{orgSpecific => unknown}/NameMappingGIJ_ix.psql | 0
.../webready/{orgSpecific => unknown}/SequenceEnzymeClass.psql | 0
.../webready/{orgSpecific => unknown}/SequenceEnzymeClass_ix.psql | 0
4 files changed, 0 insertions(+), 0 deletions(-)
rename Model/lib/psql/webready/{orgSpecific => unknown}/NameMappingGIJ.psql (100%)
rename Model/lib/psql/webready/{orgSpecific => unknown}/NameMappingGIJ_ix.psql (100%)
rename Model/lib/psql/webready/{orgSpecific => unknown}/SequenceEnzymeClass.psql (100%)
rename Model/lib/psql/webready/{orgSpecific => unknown}/SequenceEnzymeClass_ix.psql (100%)
diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/unknown/NameMappingGIJ.psql
similarity index 100%
rename from Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql
rename to Model/lib/psql/webready/unknown/NameMappingGIJ.psql
diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql b/Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql
similarity index 100%
rename from Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql
rename to Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql
diff --git a/Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass.psql b/Model/lib/psql/webready/unknown/SequenceEnzymeClass.psql
similarity index 100%
rename from Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass.psql
rename to Model/lib/psql/webready/unknown/SequenceEnzymeClass.psql
diff --git a/Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webready/unknown/SequenceEnzymeClass_ix.psql
similarity index 100%
rename from Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass_ix.psql
rename to Model/lib/psql/webready/unknown/SequenceEnzymeClass_ix.psql
From 23393e593f72e0d4b632f7bb2c170decf0ae82be Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 22 May 2025 18:04:42 -0400
Subject: [PATCH 062/112] add comments
---
Model/lib/xml/tuningManager/webtables.org | 2 ++
1 file changed, 2 insertions(+)
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index 0c80030b59..04e3ae5720 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -38,6 +38,7 @@
- [X] ProteinAttributes.psql
- [ ] TranscriptAttributes_ix.psql
- [ ] TranscriptAttributes.psql
+ - no longer has products column because that is done by TM
- [X] CodingSequence_ix.psql
- [X] CodingSequence.psql
- [X] IntronUtrCoords_ix.psql
@@ -132,6 +133,7 @@
- [ ] GeneIntronJunction.psql
- [ ] NameMappingGIJ_ix.psql
- [ ] NameMappingGIJ.psql
+ - needs to be in TM, depends on dataset presenters
- [ ] GeneMaxIntronGIJ_ix.psql
- should be aux table and dropped
- [ ] GeneMaxIntronGIJ.psql
From 3372f5fdc2026ec85c057c3cca2a87cbb81e6478 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 10:10:49 -0400
Subject: [PATCH 063/112] debug
---
Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
index 73c8db86ef..60d85bd95a 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -293,7 +293,7 @@
end
) as VARCHAR(300)
) as old_gene_product,
- genefeat.gene_product,
+-- genefeat.gene_product,
genefeat.gene_type,
genefeat.gene_ebi_biotype,
genefeat.gene_id,
From f17c412b51730a65810658909148af92fea4b495 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 10:41:02 -0400
Subject: [PATCH 064/112] debug
---
Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
index 60d85bd95a..48093a587b 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -364,7 +364,8 @@
pa.molecular_weight,
pa.isoelectric_point,
pa.signalp_peptide,
- pa.ec_numbers, pa.ec_numbers_derived,
+ pa.ec_numbers,
+ --pa.ec_numbers_derived,
pa.annotated_go_component,
pa.annotated_go_function,
pa.annotated_go_process,
From 604dcc5161f61165771d2dc62a9c6d1f3519ff3b Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 10:47:14 -0400
Subject: [PATCH 065/112] debug
---
Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
index 48093a587b..abe0104ceb 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -443,7 +443,7 @@
;
- UPDATE :ORG_ABBREVTranscriptAttributes
+ UPDATE :SCHEMA.TranscriptAttributes
SET representative_transcript = source_id
WHERE representative_transcript is null
and org_abbrev = ':ORG_ABBREV'
From 6431a17ab73069578221a75054c6ad354437393c Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 11:04:41 -0400
Subject: [PATCH 066/112] debug
---
Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql | 2 +-
.../psql/webready/orgSpecific/EstAlignmentGeneSummary.psql | 2 +-
Model/lib/psql/webready/orgSpecific/GeneAttributes.psql | 4 +++-
Model/lib/psql/webready/orgSpecific/GeneModelDump.psql | 2 +-
Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql | 1 -
Model/lib/xml/tuningManager/webtables.org | 1 +
6 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
index 4e1996000d..843400b3e9 100644
--- a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
+++ b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
@@ -1,6 +1,6 @@
:CREATE_AND_POPULATE
SELECT DISTINCT ta.source_id, ta.gene_source_id, ta.project_id, sr.protocol_app_node_id,
- ta.project_id, ta.org_abbrev, current_timestamp as modification_date
+ ta.project_id, ta.org_abbrev, current_timestamp as modification_date,
CASE
WHEN ta.is_reversed = 0
THEN round(abs(ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0)
diff --git a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql
index dc2936487b..c4546c4fea 100644
--- a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql
+++ b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql
@@ -50,7 +50,7 @@
/* (because they overlap no genes) */
SELECT ba.blat_alignment_id
FROM dots.BlatAlignment ba, dots.NaSequence query_sequence,
- sres.OntologyTerm so, dots.NaSequence target_sequence,
+ sres.OntologyTerm so, dots.NaSequence target_sequence
WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id
AND query_sequence.sequence_ontology_id = so.ontology_term_id
AND ba.target_na_sequence_id = target_sequence.na_sequence_id
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
index 490b20f223..61fe20efd0 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
@@ -60,7 +60,9 @@
, ta.gene_locations as locations
FROM :SCHEMA.TranscriptAttributes ta
INNER JOIN (
- SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo, MIN(gene_product) AS product,
+ SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo,
+ --MIN(gene_product) AS product,
+ 'FIX ME' AS product,
substr(STRING_AGG(transcript_product, ',' order by transcript_product), 1, 240) as transcript_product
FROM :SCHEMA.TranscriptAttributes
WHERE org_abbrev = ':ORG_ABBREV'
diff --git a/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql
index 2ee1bcdf1e..79eab5a4d7 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql
@@ -8,7 +8,7 @@
gm.type, gl.is_reversed
FROM
apidb.FeatureLocation gl, dots.NaSequence s,
- SCHEMA.TranscriptAttributes ta,
+ :SCHEMA.TranscriptAttributes ta,
(
SELECT CASE el.feature_type WHEN 'ExonFeature' THEN 'Exon' ELSE el.feature_type END as type,
el.parent_id as na_feature_id, el.start_min as start_min, el.end_max as end_max
diff --git a/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql
index d8915ea386..0a6b669d8f 100644
--- a/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql
+++ b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql
@@ -4,7 +4,6 @@
, current_timestamp as modification_date
, ta.source_id as transcript_source_id
, ta.gene_source_id AS gene_source_id
- , ta.project_id
, tmf.topology AS tmf_topology
, aal.start_min AS tmf_start_min
, aal.end_max AS tmf_end_max
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index 04e3ae5720..d8eee266c7 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -58,6 +58,7 @@
- [X] GeneId.psql
- [X] GeneAttributes_ix.psql
- [X] GeneAttributes.psql
+ - fix gene_product
- [X] GeneCopyNumbers_ix.psql
- [X] GeneCopyNumbers.psql
- [X] GeneGoTable_ix.psql
From ede9ee692be4a865bd48a52ea008a5080daf297e Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 11:31:10 -0400
Subject: [PATCH 067/112] debug
---
Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql | 2 +-
Model/lib/psql/webready/orgSpecific/GeneAttributes.psql | 5 +++--
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
index 843400b3e9..a29541ffe3 100644
--- a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
+++ b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql
@@ -1,5 +1,5 @@
:CREATE_AND_POPULATE
- SELECT DISTINCT ta.source_id, ta.gene_source_id, ta.project_id, sr.protocol_app_node_id,
+ SELECT DISTINCT ta.source_id, ta.gene_source_id, sr.protocol_app_node_id,
ta.project_id, ta.org_abbrev, current_timestamp as modification_date,
CASE
WHEN ta.is_reversed = 0
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
index 61fe20efd0..8ad8a6f0d0 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
@@ -12,7 +12,8 @@
, sequence_id
, gene_name AS name
, COALESCE(aggregates.product, aggregates.transcript_product) as old_product
- , COALESCE(gp.product, 'unspecified product') as product
+ -- , COALESCE(gp.product, 'unspecified product') as product
+ , 'FIX ME' as product
, gene_type
, gene_ebi_biotype
, gene_id
@@ -88,7 +89,7 @@
) t
GROUP BY na_feature_id
) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id
- LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id
+-- LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id
WHERE ta.org_abbrev = ':ORG_ABBREV'
ORDER BY ta.gene_source_id
:DECLARE_PARTITION;
From d9acbfeaa675f0e6308d6d65062e94974f9233e8 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 11:39:59 -0400
Subject: [PATCH 068/112] debug
---
.../psql/webready/orgSpecific/GeneAttributes.psql | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
index 8ad8a6f0d0..de455ff94a 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
@@ -44,12 +44,12 @@
, gene_context_start as context_start
, gene_context_end as context_end
, orthomcl_name
- , gene_total_hts_snps as total_hts_snps
- , gene_hts_nonsynonymous_snps as hts_nonsynonymous_snps
- , gene_hts_stop_codon_snps as hts_stop_codon_snps
- , gene_hts_noncoding_snps as hts_noncoding_snps
- , gene_hts_synonymous_snps as hts_synonymous_snps
- , gene_hts_nonsyn_syn_ratio as hts_nonsyn_syn_ratio
+ -- , gene_total_hts_snps as total_hts_snps
+ -- , gene_hts_nonsynonymous_snps as hts_nonsynonymous_snps
+ -- , gene_hts_stop_codon_snps as hts_stop_codon_snps
+ -- , gene_hts_noncoding_snps as hts_noncoding_snps
+ -- , gene_hts_synonymous_snps as hts_synonymous_snps
+ -- , gene_hts_nonsyn_syn_ratio as hts_nonsyn_syn_ratio
, comment_string
, uniprot.uniprot_id
, uniprot.uniprot_id_internal
From d10d7ef1dce8665498ea17ab7458ccba8966f5d1 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 11:44:18 -0400
Subject: [PATCH 069/112] debug
---
Model/lib/psql/webready/orgSpecific/GeneAttributes.psql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
index de455ff94a..150593e12d 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
@@ -109,12 +109,12 @@
UPDATE :SCHEMA.GeneAttributes ga
SET strain_count = (
SELECT strain_count
- FROM :SCHEMA.:ORG_ABBREVSpeciesInfo si
+ FROM :SCHEMA.:ORG_ABBREVSpeciesInfoTmp si
WHERE si.genus_species = ga.genus_species
)
WHERE org_abbrev = ':ORG_ABBREV'
;
-drop table :SCHEMA.:ORG_ABBREVSpeciesInfo
+drop table :SCHEMA.:ORG_ABBREVSpeciesInfoTmp
;
From 0843462064a7861708e5cf1844e044259861a795 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 11:51:30 -0400
Subject: [PATCH 070/112] debug
---
Model/lib/psql/webready/orgSpecific/GeneAttributes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
index 150593e12d..9f8295b7cc 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
@@ -94,7 +94,7 @@
ORDER BY ta.gene_source_id
:DECLARE_PARTITION;
-
+ drop table if exists :SCHEMA.:ORG_ABBREVSpeciesInfoTmp;
CREATE unlogged TABLE :SCHEMA.:ORG_ABBREVSpeciesInfoTmp as
SELECT genus_species, count(distinct organism) as strain_count
From 1ab9d38d58634824ba31b872b8c614411c8f3ef7 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 12:03:32 -0400
Subject: [PATCH 071/112] debug
---
Model/lib/psql/webready/orgSpecific/TFBSGene.psql | 2 +-
Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql | 7 +++++--
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
index 882cf31ab9..db5a378683 100644
--- a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
+++ b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
@@ -2,7 +2,7 @@
SELECT DISTINCT
':PROJECT_ID' as project_id,
':ORG_ABBREV' as org_abbrev,
- current_timestamp as modification_date
+ current_timestamp as modification_date,
ga.source_id as gene_source_id,
ga.organism as organism,
ga.genus_species as species,
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
index 1490621a67..c585c753db 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
@@ -40,9 +40,12 @@
AND (tec.ec_number_3 = pec.ec_number_3 or tec.ec_number_3 is null or pec.ec_number_3 is null)
AND (tec.ec_number_4 = pec.ec_number_4 or tec.ec_number_4 is null or pec.ec_number_4 is null)
)
- SELECT DISTINCT ga.source_id
+ SELECT DISTINCT
+ ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , CURRENT_TIMESTAMP as modification_date
+ , ga.source_id
, ga.gene_source_id
- , ga.project_id
, pa.source_id as pathway_source_id
, pa.name as pathway_name
, ec_match.ec_number_transcript as ec_number_gene
From 7f533fee902bd1bf5553fe7ff39179abc1949d09 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 12:23:05 -0400
Subject: [PATCH 072/112] debug
---
Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql | 7 +++----
.../lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 8 +++++---
Model/lib/psql/webready/orgSpecific/TFBSGene.psql | 4 ++--
3 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
index f12202677e..cc66b15a5c 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
@@ -1,13 +1,12 @@
:CREATE_AND_POPULATE
-
- CREATE TABLE PathwayNodeGene as
SELECT DISTINCT pn.pathway_node_id
- , tp.gene_source_id
- FROM transcriptpathway tp
+ , tp.gene_source_id, tp.project_id, tp.org_abbrev, current_timestamp as modification_date,
+ FROM :SCHEMA.transcriptpathway tp
, sres.pathwaynode pn
WHERE tp.pathway_id = pn.pathway_id
AND tp.ec_number_gene like replace(pn.display_label, '-', '%')
+ and tp.org_abbrev = ':ORG_ABBREV'
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
index e792016369..03e8516845 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
@@ -2,9 +2,10 @@
CREATE TABLE PathwaysGeneTable AS
- SELECT * FROM (
+ SELECT t2.*, current_timestamp as modification_date FROM (
SELECT DISTINCT gene_source_id
, project_id
+ , tp.org_abbrev
, pathway_source_id
, pathway_name
, count(reaction_source_id) as reactions
@@ -22,7 +23,7 @@
, pr.expasy_url
, tp.pathway_source
, CASE max(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match
- FROM TranscriptPathway tp
+ FROM :SCHEMA.TranscriptPathway tp
, PathwayAttributes pa
, PathwayCompounds pc
, PathwayReactions pr
@@ -33,7 +34,8 @@
AND tp.ec_number_pathway = pr.enzyme
AND tp.wildcard_count_gene <= tp.wildcard_count_pathway
AND pr.enzyme != '-.-.-.-'
- GROUP BY tp.gene_source_id, tp.project_id, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
+ AND tp.org_abbrev = ':ORG_ABBREV'
+ GROUP BY tp.gene_source_id, tp.project_id, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
) t
GROUP BY gene_source_id, project_id, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match
) t2
diff --git a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
index db5a378683..88a75f3f2b 100644
--- a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
+++ b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
@@ -1,8 +1,8 @@
:CREATE_AND_POPULATE
+ select t,*, current_timestamp as modification_date from (
SELECT DISTINCT
':PROJECT_ID' as project_id,
':ORG_ABBREV' as org_abbrev,
- current_timestamp as modification_date,
ga.source_id as gene_source_id,
ga.organism as organism,
ga.genus_species as species,
@@ -41,7 +41,7 @@
AND ga.org_abbrev = ':ORG_ABBREV'
AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000)
or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) )
-
+ ) t
:DECLARE_PARTITION;
From 4c14c17d87d7910155e1b24a03149537695e3bdd Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 12:45:08 -0400
Subject: [PATCH 073/112] debug
---
Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql | 2 +-
Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 3 ---
Model/lib/psql/webready/orgSpecific/TFBSGene.psql | 7 +++----
3 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
index cc66b15a5c..e0d672aa7c 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
@@ -1,7 +1,7 @@
:CREATE_AND_POPULATE
SELECT DISTINCT pn.pathway_node_id
- , tp.gene_source_id, tp.project_id, tp.org_abbrev, current_timestamp as modification_date,
+ , tp.gene_source_id, tp.project_id, tp.org_abbrev, current_timestamp as modification_date
FROM :SCHEMA.transcriptpathway tp
, sres.pathwaynode pn
WHERE tp.pathway_id = pn.pathway_id
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
index 03e8516845..4730dcaf00 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
@@ -1,7 +1,4 @@
:CREATE_AND_POPULATE
-
-
- CREATE TABLE PathwaysGeneTable AS
SELECT t2.*, current_timestamp as modification_date FROM (
SELECT DISTINCT gene_source_id
, project_id
diff --git a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
index 88a75f3f2b..52fa02bb4d 100644
--- a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
+++ b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql
@@ -1,8 +1,8 @@
:CREATE_AND_POPULATE
- select t,*, current_timestamp as modification_date from (
SELECT DISTINCT
':PROJECT_ID' as project_id,
':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date,
ga.source_id as gene_source_id,
ga.organism as organism,
ga.genus_species as species,
@@ -31,8 +31,8 @@
THEN '-'
ELSE '+'
END
- END as direction,
- aef.*
+ END as direction
+ -- , aef.*
FROM dots.BindingSiteFeature aef,
apidb.FeatureLocation arrloc,
:SCHEMA.GeneAttributes ga
@@ -41,7 +41,6 @@
AND ga.org_abbrev = ':ORG_ABBREV'
AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000)
or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) )
- ) t
:DECLARE_PARTITION;
From 5a254fe2cd32c595b74f4763ed56a17cbcc39845 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 13:03:13 -0400
Subject: [PATCH 074/112] debug
---
Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 6 +++---
Model/lib/xml/tuningManager/webtables.org | 1 +
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
index 4730dcaf00..2dc24298d4 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
@@ -21,9 +21,9 @@
, tp.pathway_source
, CASE max(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match
FROM :SCHEMA.TranscriptPathway tp
- , PathwayAttributes pa
- , PathwayCompounds pc
- , PathwayReactions pr
+ , :SCHEMA.PathwayAttributes pa
+ , :SCHEMA.PathwayCompounds pc
+ , :SCHEMA.PathwayReactions pr
WHERE tp.pathway_id = pa.pathway_id
AND pc.pathway_id = pa.pathway_id
AND pr.reaction_id = pc.reaction_id
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index d8eee266c7..d622483647 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -73,6 +73,7 @@
- [X] GeneSummaryFilter.psql
- [X] TFBSGene_ix.psql
- [X] TFBSGene.psql
+ - removed aef.*
- [ ] PathwayNodeGene_ix.psql
- This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- [ ] PathwayNodeGene.psql
From 8480a7004f8bf1a6090aedba31fe2d153ae2035c Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 13:11:28 -0400
Subject: [PATCH 075/112] debug
---
Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
index 2dc24298d4..213386f1b0 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
@@ -2,7 +2,7 @@
SELECT t2.*, current_timestamp as modification_date FROM (
SELECT DISTINCT gene_source_id
, project_id
- , tp.org_abbrev
+ , org_abbrev
, pathway_source_id
, pathway_name
, count(reaction_source_id) as reactions
@@ -15,6 +15,7 @@
, tp.project_id
, tp.pathway_source_id
, tp.pathway_name
+ , tp.org_abbrev
, pr.reaction_source_id
, pr.enzyme
, pr.expasy_url
From 7fc4ad7cc36e038f7d51d68b4e032cfda0335d66 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 13:24:33 -0400
Subject: [PATCH 076/112] debug
---
Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
index 213386f1b0..ca73bc0469 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
@@ -33,9 +33,9 @@
AND tp.wildcard_count_gene <= tp.wildcard_count_pathway
AND pr.enzyme != '-.-.-.-'
AND tp.org_abbrev = ':ORG_ABBREV'
- GROUP BY tp.gene_source_id, tp.project_id, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
+ GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrevtp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
) t
- GROUP BY gene_source_id, project_id, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match
+ GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match
) t2
ORDER BY pathway_source, lower(pathway_name)
From 90c0ff88b27e4f47ec3ade6264796954c58c6710 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 13:30:40 -0400
Subject: [PATCH 077/112] debug
---
Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
index ca73bc0469..21041cb21c 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
@@ -33,7 +33,7 @@
AND tp.wildcard_count_gene <= tp.wildcard_count_pathway
AND pr.enzyme != '-.-.-.-'
AND tp.org_abbrev = ':ORG_ABBREV'
- GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrevtp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
+ GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrev, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
) t
GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match
) t2
From c93978e3edaab4a48ce8aa6ddf31c986b6a18427 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 23 May 2025 16:40:07 -0400
Subject: [PATCH 078/112] debug
---
Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
index 1f0e0f78e9..ef804fdad7 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
@@ -215,5 +215,5 @@
drop table :SCHEMA.:ORG_ABBREVGoTermList_tmp;
drop table :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp;
-drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp;
---drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp;
+drop table :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp;
+--drop table :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp;
From 376f8bab6e3844fa87049750d305a153089301ea Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 27 May 2025 13:51:10 -0400
Subject: [PATCH 079/112] add TaxonTree
---
Model/lib/psql/webready/global/TaxonTree.psql | 7 +++++++
.../lib/psql/webready/global/TaxonTree_ix.psql | 2 ++
.../psql/webready/orgSpecific/Taxonomy.psql | 18 +++++-------------
3 files changed, 14 insertions(+), 13 deletions(-)
create mode 100644 Model/lib/psql/webready/global/TaxonTree.psql
create mode 100644 Model/lib/psql/webready/global/TaxonTree_ix.psql
diff --git a/Model/lib/psql/webready/global/TaxonTree.psql b/Model/lib/psql/webready/global/TaxonTree.psql
new file mode 100644
index 0000000000..d09a10167a
--- /dev/null
+++ b/Model/lib/psql/webready/global/TaxonTree.psql
@@ -0,0 +1,7 @@
+ CREATE TABLE :SCHEMA.TaxonTree as
+ SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id,
+ cast(tn.name as varchar(80)) as name,
+ cast(t.rank as varchar(24)) as rank
+ FROM sres.Taxon t, sres.TaxonName tn
+ WHERE t.taxon_id = tn.taxon_id
+ AND tn.name_class = 'scientific name'
diff --git a/Model/lib/psql/webready/global/TaxonTree_ix.psql b/Model/lib/psql/webready/global/TaxonTree_ix.psql
new file mode 100644
index 0000000000..4e31478a22
--- /dev/null
+++ b/Model/lib/psql/webready/global/TaxonTree_ix.psql
@@ -0,0 +1,2 @@
+ CREATE INDEX taxontree_idx ON :SCHEMA.taxontree (taxon_id, name)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
index 87f0a5c1d9..66a639f094 100644
--- a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
+++ b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
@@ -1,21 +1,13 @@
:CREATE_AND_POPULATE
WITH RECURSIVE cte AS (
- WITH tax AS(
- SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id,
- cast(tn.name as varchar(80)) as name,
- cast(t.rank as varchar(24)) as rank
- FROM sres.Taxon t, sres.TaxonName tn
- WHERE t.taxon_id = tn.taxon_id
- AND tn.name_class = 'scientific name'
- )
SELECT tax.*, name as organism, ARRAY[taxon_id::numeric] as path
- FROM tax
+ FROM :SCHEMA.taxontree
WHERE taxon_id = :TAXON_ID
UNION
- SELECT tax.*, cte.organism, cte.path || tax.taxon_id as path
- FROM tax, cte
- WHERE cte.parent_id = tax.taxon_id
- AND tax.name != 'root'
+ SELECT tt.*, cte.organism, cte.path || tt.taxon_id as path
+ FROM :SCHEMA.taxontree tt, cte
+ WHERE cte.parent_id = tt.taxon_id
+ AND tt.name != 'root'
)
SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum,
':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
From c1ca624e3e5f7c20b2df9718e4f90258bd4b3bb9 Mon Sep 17 00:00:00 2001
From: bindu
Date: Wed, 28 May 2025 08:56:22 -0400
Subject: [PATCH 080/112] fix SQL, as in #8aca696
---
.../lib/psql/webready/orgSpecific/TranscriptPathway.psql | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
index c585c753db..3280209fa6 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
@@ -1,9 +1,7 @@
:CREATE_AND_POPULATE
WITH transcript_ec AS (
SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
- -- CHECK AND FIX
- -- regexp_count( ec.ec_number, '-') as wildcard_count
- count( ec.ec_number) as wildcard_count
+ regexp_count( ec.ec_number, '-') as wildcard_count
FROM sres.EnzymeClass ec
WHERE enzyme_class_id IN (SELECT aseqEc.enzyme_class_id
FROM dots.AaSequenceEnzymeClass aseqEc, dots.aasequence seq
@@ -20,9 +18,7 @@
),
pathway_ec AS (
SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
- -- CHECK AND FIX
- -- regexp_count( ec.ec_number, '-') as wildcard_count
- count( ec.ec_number) as wildcard_count
+ regexp_count( ec.ec_number, '-') as wildcard_count
FROM sres.EnzymeClass ec
WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM pathway_node_ec)
GROUP BY ec.enzyme_class_id
From f9fabd48dc5ee9de854e8870ca740b031bfe5f9e Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Wed, 28 May 2025 11:18:20 -0400
Subject: [PATCH 081/112] add group tables
---
.../comparative/GroupDomainDescriptions.psql | 20 ++++
.../GroupDomainDescriptions_ix.psql | 2 +
.../comparative/ProteinDomainAssignment.psql | 38 ++++++++
.../ProteinDomainAssignment_ix.psql | 11 +++
.../comparative/ProteinSequenceGroup.psql | 92 +++++++++++++++++++
.../comparative/ProteinSequenceGroup_ix.sql | 15 +++
6 files changed, 178 insertions(+)
create mode 100644 Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql
create mode 100644 Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql
create mode 100644 Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql
create mode 100644 Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql
create mode 100644 Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql
create mode 100644 Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql
diff --git a/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql
new file mode 100644
index 0000000000..f5d73f6208
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql
@@ -0,0 +1,20 @@
+create table :SCHEMA.GroupDomainDescriptions as
+SELECT og.group_id AS group_name, ag.descriptions
+FROM apidb.OrthologGroup og,
+ (SELECT group_name,
+ STRING_AGG(accession ||' (' || num_proteins|| ')', ', ') AS descriptions
+ FROM (SELECT group_name, accession, num_proteins, rnk
+ FROM (SELECT group_name, accession, num_proteins,
+ rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk
+ FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins
+ FROM :SCHEMA.ProteinDomainAssignment
+ GROUP BY group_name,accession
+ )
+ )
+ WHERE rnk <= 3
+ )
+ GROUP BY group_name
+ ORDER BY 1
+ ) ag
+WHERE og.group_id = ag.group_name
+)
diff --git a/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql b/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql
new file mode 100644
index 0000000000..27ed2e9f4f
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql
@@ -0,0 +1,2 @@
+CREATE INDEX GroupDomainAttribute_idx ON :SCHEMA.GroupDomainDescriptions (group_name)
+;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql
new file mode 100644
index 0000000000..ce18928333
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql
@@ -0,0 +1,38 @@
+ create table :SCHEMA.ProteinDomainAssignment as
+ select sa.full_id, sa.group_name,
+ r.interpro_primary_id as accession,
+ r.interpro_desc as description,
+ CAST (NULL as NUMERIC) as domain_index,
+ sa.aa_sequence_id,
+ r.interpro_start_min as start_min,
+ r.interpro_end_min as end_max
+ from :SCHEMA.ProteinSequenceGroup sa, apidb.interproresults r
+ where sa.full_id = r.protein_source_id
+ and upper(r.interpro_db_name) = 'PFAM'
+;
+
+ create index domain_accession_ix&1
+ on :SCHEMA.ProteinDomainAssignment (accession, full_id, group_name)
+ ;
+
+ drop table if exists :SCHEMA.domainIndex_tmp
+;
+
+ create table :SCHEMA.domainIndex_tmp as
+ select row_number() OVER () as domain_index, accession
+ from (select distinct accession
+ from :SCHEMA.ProteinDomainAssignment
+ order by accession)
+;
+
+ create index domainIdxIdx on :SCHEMA.DomainIndex_tmp(accession, domain_index)
+;
+
+ update :SCHEMA.ProteinDomainAssignment da
+ set domain_index = (select domain_index
+ from :SCHEMA.DomainIndex_tmp
+ where accession = da.accession)
+;
+
+ drop table :SCHEMA.domainIndex_tmp
+ ;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql b/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql
new file mode 100644
index 0000000000..7fe3cdc3c9
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql
@@ -0,0 +1,11 @@
+ create index domain_ix_ix
+ on :SCHEMA.ProteinDomainAssignment (domain_index, accession, full_id)
+;
+
+ create index domain_group_ix
+ on :SCHEMA.ProteinDomainAssignment (group_name, accession, full_id)
+;
+
+ create index domain_seq_ix
+ on :SCHEMA.ProteinDomainAssignment (aa_sequence_id, accession, full_id, group_name)
+;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql
new file mode 100644
index 0000000000..bfdd39fcc1
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql
@@ -0,0 +1,92 @@
+ create table :SCHEMA.ProteinSequenceGroup as
+ SELECT
+ aas.source_id AS full_id,
+ aas.source_id,
+ aas.aa_sequence_id,
+ length(aas.sequence) as length,
+ aas.description AS product,
+ aas.taxon_id,
+
+ taxon.orthomcl_taxon_id,
+ taxon.taxon_group,
+ taxon.orthomcl_abbrev AS taxon_abbreviation,
+ taxon.name AS organism_name,
+ taxon.core_peripheral,
+
+ o.group_id AS group_name,
+ o.ortholog_group_id,
+ o.number_of_members AS group_size,
+ o.number_of_core_members,
+ o.number_of_peripheral_members,
+ CASE is_residual WHEN 1 THEN 'Residual'
+ ELSE 'Core' END AS group_type,
+ urls.source_url,
+ urls.source_text
+ FROM
+ dots.AASequence aas,
+ apidb.orthologGroup o,
+ apidb.orthologGroupAASequence ogseq,
+ (
+ SELECT o.orthomcl_abbrev,
+ o.taxon_id as orthomcl_taxon_id,
+ t.name,
+ t.core_peripheral,
+ t.taxon_group
+ FROM apidb.organism o,
+ (WITH RECURSIVE TaxonHierarchy AS (
+ SELECT
+ three_letter_abbrev,
+ orthomcl_clade_id,
+ name,
+ core_peripheral,
+ name AS taxon_group,
+ parent_id
+ FROM apidb.OrthomclClade
+ WHERE name IN ('Archaea', 'Bacteria', 'Alveolates', 'Amoebozoa', 'Euglenozoa',
+ 'Fungi', 'Metazoa', 'Other Eukaryota', 'Viridiplantae')
+ UNION ALL
+ SELECT
+ child.three_letter_abbrev,
+ child.orthomcl_clade_id,
+ child.name,
+ child.core_peripheral,
+ parent.taxon_group,
+ child.parent_id
+ FROM apidb.OrthomclClade child
+ JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
+ )
+ SELECT three_letter_abbrev, taxon_group, name, core_peripheral
+ FROM TaxonHierarchy
+ WHERE core_peripheral IN ('C', 'P')
+ ) t
+ WHERE t.three_letter_abbrev = o.orthomcl_abbrev
+ ) taxon,
+
+ (
+ SELECT aas.aa_sequence_id,
+ CASE
+ WHEN ores.resource_name IN ('AmoebaDB','CryptoDB','FungiDB','GiardiaDB','HostDB','MicrosporidiaDB',
+ 'PlasmoDB','PiroplasmaDB','ToxoDB','TrichDB','TriTrypDB','VectorBase')
+ THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/downloads')) || 'record/gene/'
+ || aas.source_id
+ WHEN ores.resource_name = 'Uniprot'
+ THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/proteomes') ) || 'uniprot/'
+ || aas.source_id
+ ELSE '' END AS source_url,
+ CASE WHEN ores.resource_name IS NULL THEN ''
+ ELSE aas.source_id || ' (' || ores.resource_name || ')' END AS source_text
+ FROM dots.AaSequence aas,
+ apidb.organism ot,
+ apidb.orthomclresource ores
+ WHERE ot.taxon_id = ores.orthomcl_taxon_id
+ AND ot.taxon_id = aas.taxon_id) urls
+ WHERE aas.aa_sequence_id = ogseq.aa_sequence_id
+ AND ogseq.group_id = o.group_id
+ AND aas.aa_sequence_id = urls.aa_sequence_id
+ AND taxon.orthomcl_taxon_id = aas.taxon_id
+ AND aas.taxon_id in (select distinct(eas.taxon_id) from apidb.organism og, dots.aasequence eas where eas.taxon_id = og.taxon_id)
+;
+
+ alter table :SCHEMA.ProteinSequenceGroup
+ add constraint SeqAttrs_pk primary key (full_id)
+;
\ No newline at end of file
diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql
new file mode 100644
index 0000000000..4a136d10f4
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql
@@ -0,0 +1,15 @@
+ create unique index PSG_idx ON :SCHEMA.ProteinSequenceGroup (full_id, group_name, taxon_id, source_id)
+;
+
+ create unique index PSG_gusIdx ON :SCHEMA.ProteinSequenceGroup (ortholog_group_id, aa_sequence_id)
+;
+
+ create unique index PSG_idx2 ON :SCHEMA.ProteinSequenceGroup (group_name, length desc, full_id, taxon_id)
+;
+
+ create unique index PSG_idx3
+ on :SCHEMA.ProteinSequenceGroup (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id)
+ ;
+
+ create unique index PSG_idx4 ON :SCHEMA.ProteinSequenceGroup (source_id, full_id, group_name, taxon_id)
+;
From 399b744d87de9c2683e88d361ccfdfbbea6845b2 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Wed, 28 May 2025 12:44:31 -0400
Subject: [PATCH 082/112] fix taxonomy
---
Model/lib/psql/webready/orgSpecific/Taxonomy.psql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
index 66a639f094..e0c1b7a037 100644
--- a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
+++ b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
@@ -1,7 +1,7 @@
:CREATE_AND_POPULATE
WITH RECURSIVE cte AS (
- SELECT tax.*, name as organism, ARRAY[taxon_id::numeric] as path
- FROM :SCHEMA.taxontree
+ SELECT tt.*, name as organism, ARRAY[taxon_id::numeric] as path
+ FROM :SCHEMA.taxontree tt
WHERE taxon_id = :TAXON_ID
UNION
SELECT tt.*, cte.organism, cte.path || tt.taxon_id as path
From a8afab3ce7b006f4bf080c85ec99166e3b34fb08 Mon Sep 17 00:00:00 2001
From: bindu
Date: Mon, 12 May 2025 17:16:31 -0400
Subject: [PATCH 083/112] fix sql as REGEXP_LIKE and REGEXP_COUNT are
available in pg
---
Model/lib/wdk/model/records/geneAttributeQueries.xml | 4 +---
Model/lib/wdk/model/records/transcriptAttributeQueries.xml | 4 +---
2 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/Model/lib/wdk/model/records/geneAttributeQueries.xml b/Model/lib/wdk/model/records/geneAttributeQueries.xml
index 7f7472cb24..3d69efb4c8 100644
--- a/Model/lib/wdk/model/records/geneAttributeQueries.xml
+++ b/Model/lib/wdk/model/records/geneAttributeQueries.xml
@@ -245,11 +245,9 @@ GROUP BY source_id
]]>
-
-
diff --git a/Model/lib/wdk/model/records/transcriptAttributeQueries.xml b/Model/lib/wdk/model/records/transcriptAttributeQueries.xml
index 388f9f234a..594f09496b 100644
--- a/Model/lib/wdk/model/records/transcriptAttributeQueries.xml
+++ b/Model/lib/wdk/model/records/transcriptAttributeQueries.xml
@@ -348,11 +348,9 @@
'TOXO'
]]>
-
-
From 2f384f30a7f09beb1d6b236f48e195b82e2bfe8f Mon Sep 17 00:00:00 2001
From: bindu
Date: Wed, 14 May 2025 13:58:31 -0400
Subject: [PATCH 084/112] uncomment TranscriptGenomicSequence
---
Model/lib/xml/tuningManager/apiTuningManager.xml | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml
index 6a783fcb6d..bd5806a592 100644
--- a/Model/lib/xml/tuningManager/apiTuningManager.xml
+++ b/Model/lib/xml/tuningManager/apiTuningManager.xml
@@ -2582,11 +2582,10 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
-
The genomic sequence of each transcript. Used in the transcript record / gene record page.
- - COMMENTING OUT DEPENDENCIES FOR NOW
@@ -2712,7 +2711,7 @@ sub readClob {
--->
+
From 8ac19101b8cd78badbdd5834e8a4c49a9ff715fe Mon Sep 17 00:00:00 2001
From: bindu
Date: Wed, 14 May 2025 20:50:13 -0400
Subject: [PATCH 085/112] fix dependency for TranscriptGenomicSequence
---
Model/lib/xml/tuningManager/apiTuningManager.xml | 2 --
1 file changed, 2 deletions(-)
diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml
index bd5806a592..b5b854f82d 100644
--- a/Model/lib/xml/tuningManager/apiTuningManager.xml
+++ b/Model/lib/xml/tuningManager/apiTuningManager.xml
@@ -2582,7 +2582,6 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
-
The genomic sequence of each transcript. Used in the transcript record / gene record page.
@@ -2590,7 +2589,6 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
-
Date: Thu, 15 May 2025 13:43:01 -0400
Subject: [PATCH 086/112] Fix SQL for 3D structure predictions section on gene
record page
---
.../wdk/model/records/geneTableQueries.xml | 43 ++++++++-----------
1 file changed, 19 insertions(+), 24 deletions(-)
diff --git a/Model/lib/wdk/model/records/geneTableQueries.xml b/Model/lib/wdk/model/records/geneTableQueries.xml
index 16928d821b..583236c5bf 100644
--- a/Model/lib/wdk/model/records/geneTableQueries.xml
+++ b/Model/lib/wdk/model/records/geneTableQueries.xml
@@ -3759,30 +3759,25 @@ from apidbTuning.AllGeneProducts
From 4662e325ca4eba08672bc93c990124ebd37d3670 Mon Sep 17 00:00:00 2001
From: Paul Wilkinson
Date: Fri, 16 May 2025 06:03:04 -0400
Subject: [PATCH 087/112] Edited SQL to postgres to fix scRNA-Seq section on
gene record page
---
.../wdk/model/records/geneTableQueries.xml | 34 ++++++++-----------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/Model/lib/wdk/model/records/geneTableQueries.xml b/Model/lib/wdk/model/records/geneTableQueries.xml
index 583236c5bf..bcdf99863c 100644
--- a/Model/lib/wdk/model/records/geneTableQueries.xml
+++ b/Model/lib/wdk/model/records/geneTableQueries.xml
@@ -1762,25 +1762,21 @@ from (
From d17270224ceed9a52eee3a2f9798e92b231ea834 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Wed, 28 May 2025 14:47:34 -0400
Subject: [PATCH 088/112] debug
---
Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
index ef804fdad7..77068d1b49 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
@@ -169,7 +169,7 @@
LEFT JOIN :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp go ON tas.aa_sequence_id = go.aa_sequence_id
LEFT JOIN (
SELECT aa_sequence_id, string_agg(peptide_sequence, ', ') peptide_sequence
- FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :SCHEMA.:ORG_ABBREVSignalPeptideDomains) t
+ FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :SCHEMA.SignalPeptideDomains where org_abbrev = ':ORG_ABBREV') t
GROUP BY aa_sequence_id
) sigp ON tas.aa_sequence_id = sigp.aa_sequence_id
LEFT JOIN (
@@ -207,7 +207,7 @@
- update :SCHEMA.:ORG_ABBREVProteinAttributes gaup
+ update :SCHEMA.ProteinAttributes_:ORG_ABBREV gaup
set has_seqedit = 1
where source_id in (select source_id from apidb.seqedit)
From 783a97d6c8844ecf88192b3a01b91382db1e1cd1 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Wed, 28 May 2025 15:44:37 -0400
Subject: [PATCH 089/112] debug
---
Model/lib/psql/webready/comparative/AlphaFoldGenes.psql | 2 +-
.../lib/psql/webready/comparative/GroupPhylogeneticProfile.psql | 2 +-
Model/lib/psql/webready/comparative/OrthologousTranscripts.psql | 2 +-
.../lib/psql/webready/comparative/ProteinDomainAssignment.psql | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
index 2e9c2c7e34..56d3a8f373 100644
--- a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
+++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
@@ -51,7 +51,7 @@
;
drop table if exists :SCHEMA.minrank
-
+;
CREATE UNLOGGED TABLE :SCHEMA.minRank AS (
SELECT gene_source_id
, MIN(rank) as min_rank
diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
index ddb118b5e4..dcbb9370d4 100644
--- a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
+++ b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
@@ -3,7 +3,7 @@
SELECT rep.orthomcl_name, pp.profile_string
FROM apidb.PhylogeneticProfile pp,
(SELECT orthomcl_name, max(source_id) as source_id
- FROM :SCHEM.GeneAttributes
+ FROM :SCHEMA.GeneAttributes
GROUP BY orthomcl_name) rep
WHERE rep.source_id = pp.source_id
diff --git a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
index e4766f036f..983e5239c3 100644
--- a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
+++ b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql
@@ -35,7 +35,7 @@
and ota.taxon_id = o.taxon_id
),
syn_pairs
- as (select na_feature_id, syn_na_feature_id, 1 as is_syntenic from SyntenicPairs
+ as (select na_feature_id, syn_na_feature_id, 1 as is_syntenic from :SCHEMA.SyntenicPairs
)
select all_pairs.*
, coalesce(syn_pairs.is_syntenic, 0) as is_syntenic
diff --git a/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql
index ce18928333..5ba8d01290 100644
--- a/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql
+++ b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql
@@ -11,7 +11,7 @@
and upper(r.interpro_db_name) = 'PFAM'
;
- create index domain_accession_ix&1
+ create index domain_accession_ix
on :SCHEMA.ProteinDomainAssignment (accession, full_id, group_name)
;
From 8b3a7bd55caa9ce1c2b4c495e0ef89ea151f4e48 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Wed, 28 May 2025 15:54:28 -0400
Subject: [PATCH 090/112] debug
---
Model/lib/psql/webready/comparative/AlphaFoldGenes.psql | 2 +-
.../lib/psql/webready/comparative/GroupDomainDescriptions.psql | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
index 56d3a8f373..b442e08705 100644
--- a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
+++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
@@ -92,6 +92,6 @@
;
drop table if exists :SCHEMA.uniprotgenes;
- drop table if exists :SCHEMA.minrank
+ drop table if exists :SCHEMA.minrank;
drop table if exists :SCHEMA.alphafoldhits;
diff --git a/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql
index f5d73f6208..919cb8a61a 100644
--- a/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql
+++ b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql
@@ -17,4 +17,5 @@ FROM apidb.OrthologGroup og,
ORDER BY 1
) ag
WHERE og.group_id = ag.group_name
-)
+
+;
\ No newline at end of file
From a60fd41f47368f1ce3171e5e3ee91b78a8011c64 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 29 May 2025 11:34:17 -0400
Subject: [PATCH 091/112] debug
---
Model/lib/psql/webready/comparative/AlphaFoldGenes.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
index b442e08705..a0702a3dfa 100644
--- a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
+++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql
@@ -1,6 +1,6 @@
drop table if exists :SCHEMA.uniprotgenes;
- CREATE TABLE :SCHEMA.uniprotGenes AS
+ CREATE UNLOGGED TABLE :SCHEMA.uniprotGenes AS
SELECT DISTINCT ed.name
, d.*
, edr.version
From c38468993b29473e8bdb4880b5e8c161ab63f5e4 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 29 May 2025 17:05:22 -0400
Subject: [PATCH 092/112] fix taxonid bug
---
Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql | 2 +-
Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql | 2 +-
Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
index 29964ad4b5..09f86475ac 100644
--- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
@@ -63,7 +63,7 @@
WHERE
sequence.taxon_id = taxon.taxon_id
AND sequence.sequence_ontology_id = so.ontology_term_id
- AND (sequence.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
+ AND (sequence.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0)
AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle')
ORDER BY organism, source_id
diff --git a/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql b/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql
index 6da04c90fb..e5b60d5e59 100644
--- a/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql
+++ b/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql
@@ -13,7 +13,7 @@
1 AS edge_level
FROM dots.SequencePiece sp, dots.NaSequence ns
WHERE sp.piece_na_sequence_id = ns.na_sequence_id
- AND (ns.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
+ AND (ns.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0)
;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
index abe0104ceb..1e47c1624b 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -204,7 +204,7 @@
) olds ON gf.na_feature_id = olds.na_feature_id
WHERE nl.is_top_level = 1
AND nl.feature_type = 'GeneFeature'
- AND (gsa.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0)
+ AND (gsa.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0)
AND species_name.name_class = 'scientific name'
AND (gf.is_predicted != 1 OR gf.is_predicted is null)
AND tn.name_class = 'scientific name'
From 98410cb898024711bc2b5ba412b166d72d350080 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Mon, 2 Jun 2025 11:12:37 -0400
Subject: [PATCH 093/112] debug
---
Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
index 77068d1b49..edf94bd30e 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
@@ -207,9 +207,10 @@
- update :SCHEMA.ProteinAttributes_:ORG_ABBREV gaup
+ update :SCHEMA.ProteinAttributes gaup
set has_seqedit = 1
where source_id in (select source_id from apidb.seqedit)
+ and org_abbrev = ':ORG_ABBREV'
;
From a37bae33f0fcee69cec834d86f925c39ce3e1712 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Mon, 2 Jun 2025 12:40:57 -0400
Subject: [PATCH 094/112] debug
---
Model/lib/psql/webready/orgSpecific/GeneId.psql | 4 ++--
.../lib/psql/webready/orgSpecific/GeneIntronJunction.psql | 8 ++++----
.../lib/psql/webready/orgSpecific/ProteinAttributes.psql | 2 +-
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/GeneId.psql b/Model/lib/psql/webready/orgSpecific/GeneId.psql
index 315917f872..ec39a78b71 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneId.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneId.psql
@@ -13,7 +13,7 @@ create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as
)
;
-create index GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id)
+create index :ORG_ABBREV_GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id)
;
@@ -261,7 +261,7 @@ create index GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp
- CREATE UNIQUE INDEX gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id)
+ CREATE UNIQUE INDEX :ORG_ABBREV_gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
index 1f42b544b4..9cf983cd01 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
@@ -29,7 +29,7 @@
- CREATE UNIQUE INDEX annottmpnew_pk_ix ON annotgij (na_sequence_id,start_min,end_max,is_reversed,feature_type)
+ CREATE UNIQUE INDEX :ORG_ABBREV_annottmpnew_pk_ix ON annotgij (na_sequence_id,start_min,end_max,is_reversed,feature_type)
;
@@ -50,7 +50,7 @@
- CREATE UNIQUE INDEX gnattidloc_pk_ix ON GeneIdLocGIJ (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression)
+ CREATE UNIQUE INDEX :ORG_ABBREV_gnattidloc_pk_ix ON GeneIdLocGIJ (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression)
;
@@ -82,7 +82,7 @@
- create index gnidloc_nafid_ix on GeneIdLocGIJ (na_feature_id)
+ create index :ORG_ABBREV_gnidloc_nafid_ix on GeneIdLocGIJ (na_feature_id)
;
@@ -183,7 +183,7 @@
- create index gijtmp_gnscid_ix on gijtmp (gene_source_id)
+ create index :ORG_ABBREV_gijtmp_gnscid_ix on gijtmp (gene_source_id)
;
diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
index edf94bd30e..14be4666d1 100644
--- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql
@@ -80,7 +80,7 @@
- create index ProteinGoAttr_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id)
+ create index ProteinGoAttr_aaSequenceId_:ORG_ABBREV ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id)
;
From b6e139b8c21102e691b5d7fc8f892e8d4d4cbc46 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Mon, 2 Jun 2025 19:51:05 -0400
Subject: [PATCH 095/112] remove ortho .psql
---
.../webready/global/GroupDomainAttribute.psql | 24 -----
.../global/GroupDomainAttribute_ix.psql | 2 -
.../webready/global/SequenceAttributes.psql | 100 ------------------
.../global/SequenceAttributes_ix.psql | 31 ------
4 files changed, 157 deletions(-)
delete mode 100644 Model/lib/psql/webready/global/GroupDomainAttribute.psql
delete mode 100644 Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql
delete mode 100644 Model/lib/psql/webready/global/SequenceAttributes.psql
delete mode 100644 Model/lib/psql/webready/global/SequenceAttributes_ix.psql
diff --git a/Model/lib/psql/webready/global/GroupDomainAttribute.psql b/Model/lib/psql/webready/global/GroupDomainAttribute.psql
deleted file mode 100644
index e9f869535b..0000000000
--- a/Model/lib/psql/webready/global/GroupDomainAttribute.psql
+++ /dev/null
@@ -1,24 +0,0 @@
-CREATE TABLE SCHEMA.GroupDomainAttribute AS
-(
-SELECT og.group_id AS group_name, ag.descriptions
-FROM apidb.OrthologGroup og,
- (SELECT group_name,
- STRING_AGG(accession ||' (' || num_proteins|| ')', ', ') AS descriptions
- FROM (SELECT group_name, accession, num_proteins, rnk
- FROM (SELECT group_name, accession, num_proteins,
- rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk
- FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins
- FROM SCHEMA.DomainAssignment
- GROUP BY group_name,accession
- )
- )
- WHERE rnk <= 3
- )
- GROUP BY group_name
- ORDER BY 1
- ) ag
-WHERE og.group_id = ag.group_name
-)
-;
-
-
diff --git a/Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql b/Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql
deleted file mode 100644
index b796bb25e4..0000000000
--- a/Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql
+++ /dev/null
@@ -1,2 +0,0 @@
-CREATE INDEX GroupDomainAttribute_idx ON :SCHEMA.GroupDomainAttribute (group_name)
- ;
diff --git a/Model/lib/psql/webready/global/SequenceAttributes.psql b/Model/lib/psql/webready/global/SequenceAttributes.psql
deleted file mode 100644
index 77e8aedba8..0000000000
--- a/Model/lib/psql/webready/global/SequenceAttributes.psql
+++ /dev/null
@@ -1,100 +0,0 @@
-
-
-
- create table SequenceAttributes as
- SELECT
- aas.source_id AS full_id,
- aas.source_id,
- aas.aa_sequence_id,
- length(aas.sequence) as length,
- aas.description AS product,
- aas.taxon_id,
-
- taxon.orthomcl_taxon_id,
- taxon.taxon_group,
- taxon.orthomcl_abbrev AS taxon_abbreviation,
- taxon.name AS organism_name,
- taxon.core_peripheral,
-
- o.group_id AS group_name,
- o.ortholog_group_id,
- o.number_of_members AS group_size,
- o.number_of_core_members,
- o.number_of_peripheral_members,
- CASE is_residual WHEN 1 THEN 'Residual'
- ELSE 'Core' END AS group_type,
- urls.source_url,
- urls.source_text
- FROM
- dots.AASequence aas,
- apidb.orthologGroup o,
- apidb.orthologGroupAASequence ogseq,
- (
- SELECT o.orthomcl_abbrev,
- o.taxon_id as orthomcl_taxon_id,
- t.name,
- t.core_peripheral,
- t.taxon_group
- FROM apidb.organism o,
- (WITH RECURSIVE TaxonHierarchy AS (
- SELECT
- three_letter_abbrev,
- orthomcl_clade_id,
- name,
- core_peripheral,
- name AS taxon_group,
- parent_id
- FROM apidb.OrthomclClade
- WHERE name IN ('Archaea', 'Bacteria', 'Alveolates', 'Amoebozoa', 'Euglenozoa',
- 'Fungi', 'Metazoa', 'Other Eukaryota', 'Viridiplantae')
- UNION ALL
- SELECT
- child.three_letter_abbrev,
- child.orthomcl_clade_id,
- child.name,
- child.core_peripheral,
- parent.taxon_group,
- child.parent_id
- FROM apidb.OrthomclClade child
- JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id
- )
- SELECT three_letter_abbrev, taxon_group, name, core_peripheral
- FROM TaxonHierarchy
- WHERE core_peripheral IN ('C', 'P')
- ) t
- WHERE t.three_letter_abbrev = o.orthomcl_abbrev
- ) taxon,
-
- (
- SELECT aas.aa_sequence_id,
- CASE
- WHEN ores.resource_name IN ('AmoebaDB','CryptoDB','FungiDB','GiardiaDB','HostDB','MicrosporidiaDB',
- 'PlasmoDB','PiroplasmaDB','ToxoDB','TrichDB','TriTrypDB','VectorBase')
- THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/downloads')) || 'record/gene/'
- || aas.source_id
- WHEN ores.resource_name = 'Uniprot'
- THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/proteomes') ) || 'uniprot/'
- || aas.source_id
- ELSE '' END AS source_url,
- CASE WHEN ores.resource_name IS NULL THEN ''
- ELSE aas.source_id || ' (' || ores.resource_name || ')' END AS source_text
- FROM dots.AaSequence aas,
- apidb.organism ot,
- apidb.orthomclresource ores
- WHERE ot.taxon_id = ores.orthomcl_taxon_id
- AND ot.taxon_id = aas.taxon_id) urls
- WHERE aas.aa_sequence_id = ogseq.aa_sequence_id
- AND ogseq.group_id = o.group_id
- AND aas.aa_sequence_id = urls.aa_sequence_id
- AND taxon.orthomcl_taxon_id = aas.taxon_id
- AND aas.taxon_id in (select distinct(eas.taxon_id) from apidb.organism og, dots.aasequence eas where eas.taxon_id = og.taxon_id)
-
- ;
-
-
-
- alter table :SCHEMA.SequenceAttributes
- add constraint SeqAttrs_pk primary key (full_id)
-
- ;
-
diff --git a/Model/lib/psql/webready/global/SequenceAttributes_ix.psql b/Model/lib/psql/webready/global/SequenceAttributes_ix.psql
deleted file mode 100644
index 88e66acdde..0000000000
--- a/Model/lib/psql/webready/global/SequenceAttributes_ix.psql
+++ /dev/null
@@ -1,31 +0,0 @@
-
-
- create unique index SeqAttrs_idx ON :SCHEMA.SequenceAttributes (full_id, group_name, taxon_id, source_id)
-
- ;
-
-
-
- create unique index SeqAttrs_gusIdx ON :SCHEMA.SequenceAttributes (ortholog_group_id, aa_sequence_id)
-
- ;
-
-
-
- create unique index SeqAttrs_idx2 ON :SCHEMA.SequenceAttributes (group_name, length desc, full_id, taxon_id)
-
- ;
-
-
-
- create unique index SeqAttrs_idx3
- on :SCHEMA.SequenceAttributes (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id)
-
- ;
-
-
-
- create unique index SeqAttrs_idx4 ON :SCHEMA.SequenceAttributes (source_id, full_id, group_name, taxon_id)
-
- ;
-
From 223b6609e0e123aa075ec449522f08908d9cc14a Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Tue, 3 Jun 2025 15:50:57 -0400
Subject: [PATCH 096/112] need to define taxon tree org specific to handle temp
taxon ids
---
.../psql/webready/orgSpecific/Taxonomy.psql | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
index e0c1b7a037..45422830bc 100644
--- a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
+++ b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql
@@ -1,11 +1,23 @@
+DROP TABLE IF EXISTS :SCHEMA.TaxonTree_:ORG_ABBREV;
+
+CREATE TABLE :SCHEMA.TaxonTree_:ORG_ABBREV as
+ SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id,
+ cast(tn.name as varchar(80)) as name,
+ cast(t.rank as varchar(24)) as rank
+ FROM sres.Taxon t, sres.TaxonName tn
+ WHERE t.taxon_id = tn.taxon_id
+ AND tn.name_class = 'scientific name'
+ AND t.taxon_id = :TAXON_ID
+ ;
+
:CREATE_AND_POPULATE
WITH RECURSIVE cte AS (
SELECT tt.*, name as organism, ARRAY[taxon_id::numeric] as path
- FROM :SCHEMA.taxontree tt
+ FROM :SCHEMA.TaxonTree_:ORG_ABBREV tt
WHERE taxon_id = :TAXON_ID
UNION
SELECT tt.*, cte.organism, cte.path || tt.taxon_id as path
- FROM :SCHEMA.taxontree tt, cte
+ FROM :SCHEMA.TaxonTree_:ORG_ABBREV tt, cte
WHERE cte.parent_id = tt.taxon_id
AND tt.name != 'root'
)
@@ -13,3 +25,6 @@
':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date
FROM (SELECT cte.* FROM cte ORDER BY path) t
:DECLARE_PARTITION;
+
+
+DROP TABLE :SCHEMA.TaxonTree_:ORG_ABBREV;
From 9083af198f65e0274803d2ad70515e67ccebd277 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 3 Jun 2025 20:35:06 -0400
Subject: [PATCH 097/112] remove taxontree
---
Model/lib/psql/webready/global/TaxonTree.psql | 7 -------
Model/lib/psql/webready/global/TaxonTree_ix.psql | 2 --
2 files changed, 9 deletions(-)
delete mode 100644 Model/lib/psql/webready/global/TaxonTree.psql
delete mode 100644 Model/lib/psql/webready/global/TaxonTree_ix.psql
diff --git a/Model/lib/psql/webready/global/TaxonTree.psql b/Model/lib/psql/webready/global/TaxonTree.psql
deleted file mode 100644
index d09a10167a..0000000000
--- a/Model/lib/psql/webready/global/TaxonTree.psql
+++ /dev/null
@@ -1,7 +0,0 @@
- CREATE TABLE :SCHEMA.TaxonTree as
- SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id,
- cast(tn.name as varchar(80)) as name,
- cast(t.rank as varchar(24)) as rank
- FROM sres.Taxon t, sres.TaxonName tn
- WHERE t.taxon_id = tn.taxon_id
- AND tn.name_class = 'scientific name'
diff --git a/Model/lib/psql/webready/global/TaxonTree_ix.psql b/Model/lib/psql/webready/global/TaxonTree_ix.psql
deleted file mode 100644
index 4e31478a22..0000000000
--- a/Model/lib/psql/webready/global/TaxonTree_ix.psql
+++ /dev/null
@@ -1,2 +0,0 @@
- CREATE INDEX taxontree_idx ON :SCHEMA.taxontree (taxon_id, name)
- ;
From 9124a0db8266a415a120daf0e8891e4830fca7bc Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Tue, 3 Jun 2025 20:48:07 -0400
Subject: [PATCH 098/112] fix taxonspecies
---
.../webready/orgSpecific/TaxonSpecies.psql | 28 ++++++++++++++++---
1 file changed, 24 insertions(+), 4 deletions(-)
diff --git a/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql b/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql
index b7d15cbbdc..22bf73943a 100644
--- a/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql
+++ b/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql
@@ -1,10 +1,29 @@
+drop table if exists :SCHEMA.taxonOfInterest_:ORG_ABBREV;
+
+create unlogged table :SCHEMA.taxonOfInterest_:ORG_ABBREV as
+select distinct ens.taxon_id
+from dots.est e,apidb.datasource ds, apidb.organism o,
+ sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens,
+ sres.externaldatabase ed, sres.ontologyterm oterm
+WhERE e.na_sequence_id = ens.na_sequence_id
+ AND ens.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ AND ens.sequence_ontology_id = oterm.ontology_term_id
+ AND ed.name = ds.name
+ and ds.taxon_id = o.taxon_id
+ and o.is_reference_strain = 1
+ and o.taxon_id = :TAXON_ID
+ union
+ select :TAXON_ID
+;
+
-- recursively walk taxon tree to find ancestor with rank "species"
-- Update this to select max/min level with rank species if there are multiple
:CREATE_AND_POPULATE
WITH RECURSIVE cte AS (
SELECT TAXON_ID, taxon_id as parent_id, 1 as lvl
FROM sres.taxon
- WHERE taxon_id = :TAXON_ID
+ WHERE taxon_id IN (SELECT taxon_id from :SCHEMA.taxonOfInterest_:ORG_ABBREV)
UNION ALL
SELECT cte.taxon_id, sub.parent_id, lvl + 1
FROM cte, sres.taxon sub
@@ -16,8 +35,9 @@
current_timestamp as modification_date
FROM cte c, sres.taxon t
WHERE t.taxon_id = c.parent_id
- AND t.rank='species'
-
-
+ AND t.rank='species'
:DECLARE_PARTITION;
+drop table if exists :SCHEMA.taxonOfInterest_:ORG_ABBREV;
+
+
From 590b6b723a024de8b75796280332b5b8a333a95e Mon Sep 17 00:00:00 2001
From: John Brestelli
Date: Wed, 4 Jun 2025 15:39:04 -0400
Subject: [PATCH 099/112] add notes
---
Model/lib/xml/tuningManager/webtables.org | 234 +++++++++++++---------
1 file changed, 135 insertions(+), 99 deletions(-)
diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org
index d622483647..2a4be5495e 100644
--- a/Model/lib/xml/tuningManager/webtables.org
+++ b/Model/lib/xml/tuningManager/webtables.org
@@ -3,140 +3,176 @@
* MO Tables
- Organism
- - [X] OrganismAbbreviationBlast_ix.psql
- - [X] OrganismAbbreviationBlast.psql
+ - [X] OrganismAbbreviationBlast_ix
+ - [X] OrganismAbbreviationBlast
- move to KEEP
- - [X] OrganismAbbreviation_ix.psql
- - [X] OrganismAbbreviation.psql
+ - [X] OrganismAbbreviation_ix
+ - [X] OrganismAbbreviation
- updated the abbreviation field to name_for_filenames
- - [X] OrganismSelectTaxonRank_ix.psql
- - [X] OrganismSelectTaxonRank.psql
- - [X] Taxonomy_ix.psql
- - [X] Taxonomy.psql
- - [X] TaxonSpecies_ix.psql
- - [X] TaxonSpecies.psql
+ - [X] OrganismSelectTaxonRank_ix
+ - [X] OrganismSelectTaxonRank
+ - [X] Taxonomy_ix
+ - [X] Taxonomy
+ - [X] TaxonSpecies_ix
+ - [X] TaxonSpecies
- Genomic Sequence
- - [X] GenomicSequenceId_ix.psql
- - [X] GenomicSequenceId.psql
- - [X] GenomicSequenceSequence_ix.psql
- - [X] GenomicSequenceSequence.psql
+ - [X] GenomicSequenceId_ix
+ - [X] GenomicSequenceId
+ - [X] GenomicSequenceSequence_ix
+ - [X] GenomicSequenceSequence
- [X] SequencePieceClosure
- [X] GenomicSeqAttributes
- [s] SequenceEnzymeClass
- Temp remove this and eventually Move to ComparativeGenomics because it depends on the OrthoMCL Derived EC Numbers
- Transcript / Protein
- - [X] SignalPeptideDomains_ix.psql
- - [X] SignalPeptideDomains.psql
- - [X] TransmembraneDomains_ix.psql
- - [X] TransmembraneDomains.psql
- - [X] PdbSimilarity_ix.psql
- - [X] PdbSimilarity.psql
- - [X] ProteinSequence_ix.psql
- - [X] ProteinSequence.psql
- - [X] ProteinAttributes_ix.psql
- - [X] ProteinAttributes.psql
- - [ ] TranscriptAttributes_ix.psql
- - [ ] TranscriptAttributes.psql
+ - [X] SignalPeptideDomains_ix
+ - [X] SignalPeptideDomains
+ - [X] TransmembraneDomains_ix
+ - [X] TransmembraneDomains
+ - [X] PdbSimilarity_ix
+ - [X] PdbSimilarity
+ - [X] ProteinSequence_ix
+ - [X] ProteinSequence
+ - [ ] ProteinAttributes_ix
+ - [ ] Remove Derived EC Numbers
+ - [ ] New Tuning table(s) for EC Derived ECs per protein (gene and transcript)
+ - [X] ProteinAttributes
+ - [ ] TranscriptAttributes_ix
+ - [ ] TranscriptAttributes
- no longer has products column because that is done by TM
- - [X] CodingSequence_ix.psql
- - [X] CodingSequence.psql
- - [X] IntronUtrCoords_ix.psql
- - [X] IntronUtrCoords.psql
- - [X] TranscriptCenDistance_ix.psql
- - [X] TranscriptCenDistance.psql
- - [ ] TranscriptPathway_ix.psql
- - [ ] TranscriptPathway.psql
+ - no longer has derived ec numbers (move to comparative genomics)
+ - [X] CodingSequence_ix
+ - [X] CodingSequence
+ - [X] IntronUtrCoords_ix
+ - [X] IntronUtrCoords
+ - [X] TranscriptCenDistance_ix
+ - [X] TranscriptCenDistance
+ - [ ] TranscriptPathway_ix
+ - [ ] TranscriptPathway
- This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- - [X] TranscriptSequence_ix.psql
- - [X] TranscriptSequence.psql
- - [X] ChIPchipTranscript_ix.psql
- - [X] ChIPchipTranscript.psql
+ - [X] TranscriptSequence_ix
+ - [X] TranscriptSequence
+ - [X] ChIPchipTranscript_ix
+ - [X] ChIPchipTranscript
- Gene
- - [X] GeneId_ix.psql
- - [X] GeneId.psql
- - [X] GeneAttributes_ix.psql
- - [X] GeneAttributes.psql
- - fix gene_product
- - [X] GeneCopyNumbers_ix.psql
- - [X] GeneCopyNumbers.psql
- - [X] GeneGoTable_ix.psql
- - [X] GeneGoTable.psql
- - [X] GeneGoTerms_ix.psql
- - [X] GeneGoTerms.psql
- - [X] GeneLocations_ix.psql
- - [X] GeneLocations.psql
- - [X] GeneModelDump_ix.psql
- - [X] GeneModelDump.psql
- - [X] GeneSummaryFilter_ix.psql
- - [X] GeneSummaryFilter.psql
- - [X] TFBSGene_ix.psql
- - [X] TFBSGene.psql
+ - [X] GeneId_ix
+ - [X] GeneId
+ - [X] GeneAttributes_ix
+ - [X] GeneAttributes
+ - remove gene_product and remove orthomclname
+ - [X] GeneCopyNumbers_ix
+ - [X] GeneCopyNumbers
+ - [X] GeneGoTable_ix
+ - [X] GeneGoTable
+ - [X] GeneGoTerms_ix
+ - [X] GeneGoTerms
+ - [X] GeneLocations_ix
+ - [X] GeneLocations
+ - [X] GeneModelDump_ix
+ - [X] GeneModelDump
+ - [X] GeneSummaryFilter_ix
+ - [X] GeneSummaryFilter
+ - [X] TFBSGene_ix
+ - [X] TFBSGene
- removed aef.*
- - [ ] PathwayNodeGene_ix.psql
+ - [ ] PathwayNodeGene_ix
- This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- - [ ] PathwayNodeGene.psql
+ - [ ] PathwayNodeGene
- This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- - [ ] PathwaysGeneTable_ix.psql
+ - [ ] PathwaysGeneTable_ix
- This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- - [ ] PathwaysGeneTable.psql
+ - [ ] PathwaysGeneTable
- This may need to move to comparative genomics because we need the OrthoDerived EC mappings
- - [X] GoTermSummary_ix.psql
- - [X] GoTermSummary.psql
- - [X] EqtlSpan_ix.psql
- - [X] EqtlSpan.psql
+ - [X] GoTermSummary_ix
+ - [X] GoTermSummary
+ - [X] EqtlSpan_ix
+ - [X] EqtlSpan
- EST
- - [ ] EstAttributes_ix.psql
+ - [X] EstAttributes_ix
- move to comparative
- join to apidb.organism and filter by "is_reference_strain"
- - [ ] EstAttributes.psql
+ - [X] EstAttributes
- move to comparative
- join to apidb.organism and filter by "is_reference_strain"
- - [ ] EstSequence_ix.psql
+ - [X] EstSequence_ix
- move to comparative
- join to apidb.organism and filter by "is_reference_strain"
- - [ ] EstSequence.psql
+ - [X] EstSequence
- move to comparative
- join to apidb.organism and filter by "is_reference_strain"
- - [X] EstAlignmentGeneSummary_ix.psql
- - [X] EstAlignmentGeneSummary.psql
+ - [X] EstAlignmentGeneSummary_ix
+ - [X] EstAlignmentGeneSummary
- Dataset / Other
- - [X] DatasetExampleSourceId_ix.psql
- - [X] DatasetExampleSourceId.psql
+ - [X] DatasetExampleSourceId_ix
+ - [X] DatasetExampleSourceId
- NOTE: this depends on Profiles
- - [X] PANExtDBRls.psql
- - [X] PANIO.psql
- - [X] PANIO_ix.psql
+ - [X] PANExtDBRls
+ - [X] PANIO
+ - [X] PANIO_ix
- - [ ] +ProfileType_ix.psql+
- - [ ] +ProfileType.psql+
- - [ ] +Profile_ix.psql+
- - [ ] +Profile.psql+
- - [ ] +ProfileSamples_ix.psql+
- - [ ] +ProfileSamples.psql+
+ - [ ] +ProfileType_ix+
+ - [ ] +ProfileType+
+ - [ ] +Profile_ix+
+ - [ ] +Profile+
+ - [ ] +ProfileSamples_ix+
+ - [ ] +ProfileSamples+
- - [X] RnaSeqStats_ix.psql
- - [X] RnaSeqStats.psql
- - [X] OrganismAttributes_ix.psql
- - [X] OrganismAttributes.psql
+ - [X] RnaSeqStats_ix
+ - [X] RnaSeqStats
+ - [X] OrganismAttributes_ix
+ - [X] OrganismAttributes
- removed ESTs and SNPs
- - [X] ChrCopyNumbers_ix.psql
- - [X] ChrCopyNumbers.psql
+ - [X] ChrCopyNumbers_ix
+ - [X] ChrCopyNumbers
- Junctions (Kathryn)
- - [ ] IntronSupportLevel_ix.psql
- - [ ] IntronSupportLevel.psql
- - [ ] GeneIntJuncStats_ix.psql
- - [ ] GeneIntJuncStats.psql
- - [ ] GeneIntronJunction_ix.psql
- - [ ] GeneIntronJunction.psql
- - [ ] NameMappingGIJ_ix.psql
- - [ ] NameMappingGIJ.psql
- - needs to be in TM, depends on dataset presenters
- - [ ] GeneMaxIntronGIJ_ix.psql
+ - [ ] IntronSupportLevel_ix
+ - [ ] IntronSupportLevel
+ - [ ] GeneIntJuncStats_ix
+ - [ ] GeneIntJuncStats
+ - [ ] GeneIntronJunction_ix
+ - [ ] GeneIntronJunction
+ - [ ] NameMappingGIJ_ix
+ - [ ] NameMappingGIJ
+ - needs to be in TM, depends on dataset presenters
+ - [ ] GeneMaxIntronGIJ_ix
- should be aux table and dropped
- - [ ] GeneMaxIntronGIJ.psql
+ - [ ] GeneMaxIntronGIJ
- should be aux table and dropped
+
+- Comparative
+ - [ ] ProteinGroup (RENMAED from SEQUENCEATTRIBUTES)
+ - add orthomcl derived ec numbers here
+ - add gene_id
+ - add transcript_id
+ - ENSURE this has a row for every protein!
+ - [ ] ProteinGroupDomainAssignment (RENAMED from DOMAINASSIGNMENT)
+
+ - [ ] GroupDomainDescriptions (RENAMED from GROUPDOMAINATTRIBUTE )
+ - [ ] proteinGroupEnyzmeClass (renamed from sequenceenzymeclass)
+ - [ ] AlphaFoldGenes
+ - [ ] GroupPhylogeneticProfile
+ - [ ] OrthologousTranscripts
+ - [ ] PhyleticPattern
+ - no longer uses dots.sequencegroup and dots.sequencesequencegroup
+
+- Global
+ - [X] CompoundAttributes
+ - [X] CompoundId
+ - [X] CompoundProperties
+ - [ ] CompoundTypeAheads
+ - double check again after database is rebuilt. may be ok
+ - [X] OntologyLevels
+ - [X] PathwayAttributes
+ - [X] PathwayCompounds
+ - [X] PathwayNodes
+ - [X] PathwayReactions
+
+- KEEP Tuning table
+ - GeneProduct (CHECK)
+ - TranscriptProduct (NEW)
+ - ProteinProduct (NEW??)
From 10002ed8d93a1ef7b113994953e105fea3e1a315 Mon Sep 17 00:00:00 2001
From: Richard Demko
Date: Thu, 5 Jun 2025 11:00:55 -0400
Subject: [PATCH 100/112] New GeneOrthologGroup and
TranscriptOrthologGroupTables
---
.../comparative/GeneOrthologGroup.psql | 10 ++++++++++
.../comparative/TranscriptOrthologGroup.psql | 10 ++++++++++
.../wdk/model/questions/params/geneParams.xml | 18 +++++++++---------
3 files changed, 29 insertions(+), 9 deletions(-)
create mode 100644 Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
create mode 100644 Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
new file mode 100644
index 0000000000..4c30361e4d
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
@@ -0,0 +1,10 @@
+ create table :SCHEMA.GeneOrthologGroup as
+ SELECT pa.gene_source_id AS gene_id,
+ ogas.group_id,
+ FROM webready.proteinattributes pa,
+ apidb.orthologgroupaasequence ogas
+ WHERE pa.aa_sequence_id = ogas.aa_sequence_id
+;
+ alter table :SCHEMA.GeneOrthologGroup
+ add constraint GeneOrthologGroup_pk primary key (gene_id)
+;
diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
new file mode 100644
index 0000000000..5362c8465e
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
@@ -0,0 +1,10 @@
+ create table :SCHEMA.TranscriptOrthologGroup as
+ SELECT ta.source_id AS source_id,
+ ogas.group_id,
+ FROM webready.transcriptattributes ta,
+ apidb.orthologgroupaasequence ogas
+ WHERE ta.aa_sequence_id = ogas.aa_sequence_id
+;
+ alter table :SCHEMA.TranscriptOrthologGroup
+ add constraint TranscriptOrthologGroup_pk primary key (source_id)
+;
diff --git a/Model/lib/wdk/model/questions/params/geneParams.xml b/Model/lib/wdk/model/questions/params/geneParams.xml
index 142a6d1b63..4635e82211 100644
--- a/Model/lib/wdk/model/questions/params/geneParams.xml
+++ b/Model/lib/wdk/model/questions/params/geneParams.xml
@@ -8274,16 +8274,16 @@ products of your selected type (or types).
SELECT three_letter_abbrev as term, name as internal, name as display
- FROM apidb.orthomcltaxon
+ FROM apidb.orthomclclade
WHERE three_letter_abbrev not in ('BACI')
ORDER BY depth_first_index ASC, three_letter_abbrev ASC
From 49e87f694113c5b7a6ae4e4cb3fc5a948616822f Mon Sep 17 00:00:00 2001
From: bindu
Date: Wed, 14 May 2025 13:58:31 -0400
Subject: [PATCH 101/112] uncomment TranscriptGenomicSequence
---
Model/lib/xml/tuningManager/apiTuningManager.xml | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml
index 6a783fcb6d..bd5806a592 100644
--- a/Model/lib/xml/tuningManager/apiTuningManager.xml
+++ b/Model/lib/xml/tuningManager/apiTuningManager.xml
@@ -2582,11 +2582,10 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
-
The genomic sequence of each transcript. Used in the transcript record / gene record page.
- - COMMENTING OUT DEPENDENCIES FOR NOW
@@ -2712,7 +2711,7 @@ sub readClob {
--->
+
From 8fc80129852b1b3f44fec5ad4c5b7f41fc17cff0 Mon Sep 17 00:00:00 2001
From: Richard Demko
Date: Thu, 5 Jun 2025 11:00:55 -0400
Subject: [PATCH 102/112] New GeneOrthologGroup and
TranscriptOrthologGroupTables
---
.../comparative/GeneOrthologGroup.psql | 10 ++++++++++
.../comparative/TranscriptOrthologGroup.psql | 10 ++++++++++
.../wdk/model/questions/params/geneParams.xml | 18 +++++++++---------
3 files changed, 29 insertions(+), 9 deletions(-)
create mode 100644 Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
create mode 100644 Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
new file mode 100644
index 0000000000..4c30361e4d
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
@@ -0,0 +1,10 @@
+ create table :SCHEMA.GeneOrthologGroup as
+ SELECT pa.gene_source_id AS gene_id,
+ ogas.group_id,
+ FROM webready.proteinattributes pa,
+ apidb.orthologgroupaasequence ogas
+ WHERE pa.aa_sequence_id = ogas.aa_sequence_id
+;
+ alter table :SCHEMA.GeneOrthologGroup
+ add constraint GeneOrthologGroup_pk primary key (gene_id)
+;
diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
new file mode 100644
index 0000000000..5362c8465e
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
@@ -0,0 +1,10 @@
+ create table :SCHEMA.TranscriptOrthologGroup as
+ SELECT ta.source_id AS source_id,
+ ogas.group_id,
+ FROM webready.transcriptattributes ta,
+ apidb.orthologgroupaasequence ogas
+ WHERE ta.aa_sequence_id = ogas.aa_sequence_id
+;
+ alter table :SCHEMA.TranscriptOrthologGroup
+ add constraint TranscriptOrthologGroup_pk primary key (source_id)
+;
diff --git a/Model/lib/wdk/model/questions/params/geneParams.xml b/Model/lib/wdk/model/questions/params/geneParams.xml
index 142a6d1b63..4635e82211 100644
--- a/Model/lib/wdk/model/questions/params/geneParams.xml
+++ b/Model/lib/wdk/model/questions/params/geneParams.xml
@@ -8274,16 +8274,16 @@ products of your selected type (or types).
SELECT three_letter_abbrev as term, name as internal, name as display
- FROM apidb.orthomcltaxon
+ FROM apidb.orthomclclade
WHERE three_letter_abbrev not in ('BACI')
ORDER BY depth_first_index ASC, three_letter_abbrev ASC
From 9d44208351a3362b09b40890dc01e3e9f249c0eb Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 5 Jun 2025 12:17:41 -0400
Subject: [PATCH 103/112] add convert2webready
---
convert2webready | 82 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 82 insertions(+)
create mode 100755 convert2webready
diff --git a/convert2webready b/convert2webready
new file mode 100755
index 0000000000..d705233c9d
--- /dev/null
+++ b/convert2webready
@@ -0,0 +1,82 @@
+#!/usr/bin/perl
+
+# grep -irl apidbtuning | ~/sourceCode/website/ApiCommonModel/convert2wr ~/sourceCode/website/ApiCommonModel/Model ~/sourceCode/ApiCommonModel/Model
+
+use strict;
+my @tables = (
+'CompoundAttributes',
+'CompoundId',
+'CompoundProperties',
+'CompoundTypeAheads',
+'OntologyLevels',
+'PathwayAttributes',
+'PathwayCompounds',
+'PathwayNodes',
+'PathwayReactions',
+'ChIPchipTranscript',
+'ChrCopyNumbers',
+'CodingSequence',
+'EqtlSpan',
+'EstAlignmentGeneSummary',
+'EstAttributes',
+'EstSequence',
+'GeneAttributes',
+'GeneCopyNumbers',
+'GeneGoTable',
+'GeneGoTerms',
+'GeneId',
+#'GeneIntJuncStats',
+#'GeneIntronJunction',
+'GeneLocations',
+#'GeneMaxIntronGIJ',
+'GeneModelDump',
+'GeneSummaryFilter',
+'GenomicSeqAttributes',
+'GenomicSequenceId',
+'GenomicSequenceSequence',
+'GoTermSummary',
+#'IntronSupportLevel',
+#'IntronUtrCoords',
+'OrganismAbbreviation',
+'OrganismSelectTaxonRank',
+'PANExtDbRls',
+'PANIO',
+'PathwayNodeGene',
+'PathwaysGeneTable',
+'PdbSimilarity',
+'ProteinAttributes',
+'ProteinSequence',
+'RnaSeqStats',
+'SequencePieceClosure',
+'SignalPeptideDomains',
+'Taxonomy',
+'TaxonSpecies',
+'TFBSGene',
+'TranscriptAttributes',
+'TranscriptCenDistance',
+'TranscriptPathway',
+'TranscriptSequence',
+'TransmembraneDomains'
+);
+
+my ($sourceModelDir, $targetModelDir) = @ARGV;
+
+while() {
+ chomp;
+ my $filenm = $_;
+ print STDERR "processing $filenm\n";
+ my $filetext = do {
+ local $/ = undef;
+ open my $fh, "<", "$sourceModelDir/$filenm"
+ or die "could not open '$sourceModelDir/$filenm': $!";
+ <$fh>;
+ };
+
+ foreach my $table (@tables) {
+ $filetext =~ s/apidbtuning.$table/webready.$table/gi;
+ }
+
+ open(FH, '>', "$targetModelDir/$filenm") or die $!;
+ print FH $filetext;
+ close FH;
+}
From 2ee6f721b062a8d1cdc29aab109ef2754b2d39a0 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Thu, 5 Jun 2025 19:42:10 -0400
Subject: [PATCH 104/112] add TranscriptProduct
---
.../xml/tuningManager/apiTuningManager.xml | 83 +++++++++++++++++++
1 file changed, 83 insertions(+)
diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml
index 1ce105cbfe..2dd94f6212 100644
--- a/Model/lib/xml/tuningManager/apiTuningManager.xml
+++ b/Model/lib/xml/tuningManager/apiTuningManager.xml
@@ -3,6 +3,89 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Map each GO term that is assigned to at least one gene to a GoSubset term
From 6e5deac4bd84b4f157b841436b348d33cde17b62 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Fri, 6 Jun 2025 10:47:57 -0400
Subject: [PATCH 105/112] restore apiTuningManager.xml
---
.../tuningManager/apiTuningManager-pruned.xml | 3051 ++++++
.../xml/tuningManager/apiTuningManager.xml | 9516 ++++++++++++++---
2 files changed, 11025 insertions(+), 1542 deletions(-)
create mode 100644 Model/lib/xml/tuningManager/apiTuningManager-pruned.xml
diff --git a/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml b/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml
new file mode 100644
index 0000000000..2dd94f6212
--- /dev/null
+++ b/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml
@@ -0,0 +1,3051 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Map each GO term that is assigned to at least one gene to a GoSubset term
+ that is either itself or an immediate ancestor. By "immediate ancestor"
+ we mean an ancestor such that there isn't an intermediate ancestor also
+ in the subset. (Note that there can be multiple links as long as none is
+ in the subset.) This is currently restricted to 'goslim_generic', solely
+ by the condition in the SUBSET_TERM subquery.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each row maps a dataset onto an ID for which the dataset contains data;
+ each dataset gets one such row.
+ Used in dataset record queries.
+
+
+
+
+
+
+
+
+
+
+
+ Stores per-organism information. Used by the organism record, as well
+ as by project_id(), the function that maps an organism to a project.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0 then 1 else 0 end as hasCentromere
+ FROM DOTS.MISCELLANEOUS f
+ , sres.ontologyTerm ot
+ , dots.nasequence s
+ WHERE ot.ontology_term_id = f.sequence_ontology_id
+ AND ot.name='centromere'
+ AND f.na_sequence_id = s.na_sequence_id
+ GROUP BY s.taxon_id
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+ = g.start_min
+ AND g.na_sequence_id = seq.na_sequence_id
+ AND t.name = 'ExternalNASequence'
+ ) gene
+ ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id)
+ GROUP BY sim.taxon_id
+ ]]>
+
+
+ = 100
+ AND s.is_best_alignment in (1)
+ AND s.percent_est_bases_aligned >= 20
+ AND s.percent_identity >= 90
+ AND e.best_alignment_count <= 1
+ AND e.source_id = s.accession
+ GROUP by s.gene HAVING count(*) >= 1
+ ) est ON ga.source_id = est.source_id
+ RIGHT OUTER JOIN (
+ SELECT project_id, taxon_id,
+ max(database_version) as database_version,
+ CASE WHEN ncbi_tax_id > 9000000000 THEN NULL
+ ELSE ncbi_tax_id
+ END ncbi_tax_id,
+ to_char(sum(length)/1000000,'9999.99') as megabps
+ FROM GenomicSeqAttributes
+ WHERE is_top_level = 1
+ GROUP BY project_ID, taxon_id, ncbi_tax_id
+ ) genomestat ON genomestat.taxon_id = ga.taxon_id
+ LEFT OUTER JOIN (
+ SELECT count(distinct ga.source_id) as ct, ga.taxon_id
+ FROM GeneAttributes ga, SnpAttributes sf
+ WHERE sf.gene_source_id = ga.source_id
+ AND ga.is_deprecated = 0
+ GROUP BY ga.taxon_id
+ ) snpCount ON ga.taxon_id = snpCount.taxon_id
+ GROUP BY genomestat.taxon_id,
+ genomestat.project_id,
+ genomestat.database_version,
+ genomestat.ncbi_tax_id,
+ genomestat.Megabps,
+ snpCount.ct
+ ]]>
+
+
+ 10000000
+ -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' ||
+ -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1"
+ -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits
+ then 'TMPTX_' || o.public_abbrev
+ else 'NCBITAXON_' || t.ncbi_tax_id
+ end as source_id,
+ o.abbrev as internal_abbrev,
+ o.public_abbrev,
+ o.orthomcl_abbrev,
+ o.family_name_for_files,
+ tn.name as organism_name,
+ o.genome_source,
+ o.strain_abbrev,
+ o.is_annotated_genome,
+ o.is_reference_strain,
+ o.is_family_representative,
+ o.name_for_filenames,
+ o.taxon_id as component_taxon_id,
+ gc.database_version,
+ gc.megabps as megabps,
+ gc.ncbi_tax_id as ncbi_tax_id,
+ gc.snpCount as snpCount,
+ gc.geneCount as geneCount,
+ gc.pseudoGeneCount as pseudoGeneCount,
+ gc.codingGeneCount as codingGeneCount,
+ gc.otherGeneCount as otherGeneCount,
+ gc.ChipChipGeneCount as ChipChipGeneCount,
+ gc.orthologCount as orthologCount,
+ gc.goCount as goCount,
+ gc.tfbsCount as tfbsCount,
+ gc.proteomicsCount as proteomicsCount,
+ gc.estCount as estCount,
+ gc.ecNumberCount as ecNumberCount,
+ cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar,
+ cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate,
+ cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset,
+ cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope,
+ cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray,
+ coalesce(oc.hasCentromere, 0) as hasCentromere,
+ coalesce(sc.contig_num, 0) as contigCount,
+ coalesce(sc.supercont_num, 0) as supercontigCount,
+ coalesce(sc.chrom_num, 0) as chromosomeCount,
+ coalesce(cc.communityCount, 0) as communityCount,
+ coalesce(psc.popsetCount, 0) as popsetCount,
+ coalesce(pc.geneArrayCount, 0) as arrayGeneCount,
+ coalesce(pc.rnaSeqCount, 0) as rnaSeqCount,
+ coalesce(pc.rtPCRCount, 0) as rtPCRCount,
+ coalesce(ta.avg_transcript_length, 0) as avg_transcript_length
+ FROM apidb.Organism o
+ INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id
+ INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id
+ LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id
+ LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id
+ LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id
+ LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id
+ LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id
+ LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id
+ LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id
+ LEFT JOIN (
+ SELECT taxon_id, round(avg(length),1) as avg_transcript_length
+ FROM TranscriptAttributes
+ GROUP by taxon_id
+ ) ta ON o.taxon_id = ta.taxon_id
+ WHERE tn.name_class = 'scientific name'
+ ) oa,
+ TaxonSpecies ts,
+ sres.taxon t,
+ sres.taxonname tn2
+ WHERE oa.component_taxon_id = ts.taxon_id
+ AND ts.species_taxon_id = t.taxon_id
+ AND ts.species_taxon_id = tn2.taxon_id
+ AND tn2.name_class = 'scientific name'
+ ]]>
+
+
+
+
+
+
+
+
+
+ Stores, for each transcript, a string containing the gene-relative coordinates
+ of all its introns and UTRs.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores special webservice abbreviations which are not standard organism
+ names. Each record maps an organism name onto this abbreviation, as
+ well as the species name and project ID. Used by the model and as an
+ input in the creation of the OrganismAbbreviationBlast tuning table.
+ Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+
+
+ Group species by higher level taxonomy. Each row associates a taxon of
+ interest with one of its ancestors in the taxon tree. Used in parameter
+ queries that have to know about the taxon tree. Propagated to portal
+ instances.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record maps an organism to its BLAST abbreviation. Used by
+ BLAST-query parameters. Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+
+
+ For each project, show which BLAST databases are available for which
+ species. Used in BLAST param queries. Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each row stores mass-spec. based expression evidence for one sample of
+ one experiment for one gene. Used for mass spec queries in the model,
+ GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary
+ tuning table.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores summary information from annotated genomes to facilitate overview section of gene page
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Mass-spec experiment results for a peptide. Used by the model, GBrowse,
+ and PBrowse.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Data from the Seattle Structural Genomics Center for Infectious Disease,
+ populated from their web service. Used in the gene record.
+
+
+
+
+
+
+
+ Used by the model and GBrowse, as well as an input in the creation of
+ the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary.
+
+
+
+
+
+
+
+ Used by the model when writing profile data
+
+
+
+
+
+
+
+ Associates an organism with the GBrowse and PBrowse tracks available
+ for it. Used by the gene record.
+
+
+
+
+
+
+
+
+
+ Each row maps a dataset onto an ID for which the dataset contains data;
+ each dataset gets one such row.
+ Used in dataset record queries.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Citation info for proteomics datasets, used by GBrowse
+
+
+
+ ' || sample || '' as sample_i
+ FROM MSPeptideSummary mps, DatasetPresenter ds
+ -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem
+ WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern)
+ ) t
+ group by name, id
+ )
+ SELECT name,
+ substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable')
+ || ' PMID: ' || publications || 'Samples:
'
+ || sample_table || chr(10) ||
+ ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation
+ FROM (
+ SELECT ds.name as name, ds.summary as description, pubs.contact_email as email,
+ pubs.pmids as publications, samples.sample_table as sample_table
+ FROM DatasetPresenter ds, pubs, samples
+ WHERE ds.dataset_presenter_id = pubs.id
+ AND ds.dataset_presenter_id = samples.id
+ ) t
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ = commit_after THEN
+ COMMIT;
+ ctrows := 0;
+ END IF;
+ END LOOP;
+ commit;
+ END;
+ $$ LANGUAGE PLPGSQL;
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id)
+ has data for a profile_set.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ for gene-page expression graphs
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Data from STRING-DB.org, populated from their web service.
+ Used in the gene record.
+
+
+
+
+
+
+
+
+ annotation updates from Apollo
+
+
+
+
+
+
+
+
+
+
+ Stores a mapping between external databases, taxon IDs, and URLs
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Text from ApolloUpdate that can be used in site search to find genes
+
+
+
+
+ = au.mapping_start
+ AND ta.source_id = au.apolloTranscript
+ AND ga.strand_plus_minus = au.strand
+ AND ta.gene_source_id = ga.source_id
+ ]]>
+
+
+
+
+
+
+
+
+
+ Text for PreferredProduct table on gene record page.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ all products for each gene
+
+
+
+
+
+
+
+
+
+
+
+
+ = 3 -- no product in apidb.GeneFeatureProduct
+ union
+ select ta.gene_source_id AS source_id, t.product, ta.project_id,
+ null as is_alternate,
+ string_agg(ta.source_id, ', ' order by ta.source_id) as transcript_ids,
+ null as reference, null as evidence_code, null as evidence_code_parameter,
+ null as assigned_by, 'dots.Transcript' as source
+ from TranscriptAttributes ta, dots.Transcript t
+ where ta.na_feature_id = t.na_feature_id
+ and t.product is not null
+ and ta.gene_source_id
+ not in (select source_id
+ from GeneProduct
+ where source_rule < 6) -- product only in dots.Transcript
+ group by ta.gene_source_id, ta.project_id, t.product
+ union
+ select gp.source_id as source_id, gp.product, ga.project_id,
+ null as is_alternate,
+ null as transcript_ids, null as reference, null as evidence_code,
+ null as evidence_code_parameter, null as assigned_by,
+ 'unspecified product' as source
+ from GeneProduct gp, GeneAttributes ga
+ where gp.source_rule = 7
+ and gp.source_id = ga.source_id
+ order by is_alternate desc, transcript_ids
+ ]]>
+
+
+
+
+
+
+
+
diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml
index 2dd94f6212..8ea8b951bc 100644
--- a/Model/lib/xml/tuningManager/apiTuningManager.xml
+++ b/Model/lib/xml/tuningManager/apiTuningManager.xml
@@ -3,1425 +3,7009 @@
-
-
-
-
-
+
+ Locations and Sequence of Transmembrane Domains (TMHMM)
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
- Map each GO term that is assigned to at least one gene to a GoSubset term
- that is either itself or an immediate ancestor. By "immediate ancestor"
- we mean an ancestor such that there isn't an intermediate ancestor also
- in the subset. (Note that there can be multiple links as long as none is
- in the subset.) This is currently restricted to 'goslim_generic', solely
- by the condition in the SUBSET_TERM subquery.
+
+ Locations and Sequence of Signal Peptide Domains (SignalP)
-
-
-
+
+
+
+
+
+
+
+ CREATE TABLE &prefixSignalPeptideDomains&1 AS
+ SELECT
+ gf.source_id gene_source_id
+ , t.source_id transcript_source_id
+ , taf.na_feature_id
+ , spf.aa_feature_id
+ , spf.aa_sequence_id
+ , spf.parent_id
+ , aal.start_min
+ , aal.end_max
+ , spf.algorithm_name
+ , substr(s.sequence, aal.end_max::INTEGER, 1) peptide_sequence
+ FROM
+ dots.SignalPeptideFeature spf
+ , dots.AaLocation aal
+ , dots.TranslatedAaFeature taf
+ , dots.TranslatedAaSequence tas
+ , dots.GeneFeature gf
+ , dots.AaSequence s
+ , dots.Transcript t
+ WHERE
+ spf.aa_sequence_id = s.aa_sequence_id
+ AND aal.aa_feature_id = spf.aa_feature_id
+ AND t.na_feature_id = taf.na_feature_id
+ AND taf.aa_sequence_id = tas.aa_sequence_id
+ AND tas.aa_sequence_id = spf.aa_sequence_id
+ AND gf.na_feature_id = t.parent_id
+ AND (spf.signal_probability >= .5
+ OR spf.signal_probability IS NULL
+ OR ((spf.means_score + spf.maxy_score) / 2) >= .5
+ OR ( spf.maxy_conclusion + spf.maxc_conclusion + spf.maxs_conclusion + spf.means_conclusion ) >= 3
+ )
+ ORDER BY
+ spf.aa_sequence_id, spf.aa_feature_id
+ ]]>
+ ]]>
-
+
+
+
+
-
- Each row maps a dataset onto an ID for which the dataset contains data;
- each dataset gets one such row.
- Used in dataset record queries.
+
+ Taxon ranks for organisms
-
-
-
+
+
+
-
- Stores per-organism information. Used by the organism record, as well
- as by project_id(), the function that maps an organism to a project.
+
+ Attributes for Metabolic Pathways
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
-
+ p.source_id
+ , p.pathway_id
+ , p.name
+ , enz.total_enzyme_count
+ , cpd.total_compound_count
+ , p.url
+ , replace(replace(ed.name, 'Pathways_', ''), '_RSRC', '') as pathway_source
+ , ed.name as external_db_name
+ , edr.version as external_db_version
+ FROM
+ sres.pathway p
+ , sres.externalDatabase ed
+ , sres.externalDatabaseRelease edr
+ ,(SELECT
+ COUNT( *) AS total_compound_count
+ , pathway_id
+ FROM
+ sres.pathwayNode pn
+ , SRES.ontologyterm ot
+ WHERE
+ pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'molecular entity'
+ GROUP BY
+ pathway_id
+ ) cpd
+ ,(SELECT
+ COUNT( *) AS total_enzyme_count
+ , pathway_id
+ FROM
+ sres.pathwayNode pn
+ , SRES.ontologyterm ot
+ WHERE
+ pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'enzyme'
+ GROUP BY
+ pathway_id
+ ) enz
+ WHERE
+ ed.external_database_id = edr.external_database_id
+ AND edr.external_database_release_id = p.external_database_release_id
+ AND cpd.pathway_id = p.pathway_id
+ AND enz.pathway_id = p.pathway_id
+ AND source_id NOT IN('ec01100', 'ec01110', 'ec01120')
+ -- temporarily remove MPMP from release 46
+ AND ed.name NOT LIKE '%MPMP%'
+ ]]>
+
0 then 1 else 0 end as hasCentromere
- FROM DOTS.MISCELLANEOUS f
- , sres.ontologyTerm ot
- , dots.nasequence s
- WHERE ot.ontology_term_id = f.sequence_ontology_id
- AND ot.name='centromere'
- AND f.na_sequence_id = s.na_sequence_id
- GROUP BY s.taxon_id
+ CREATE UNIQUE INDEX PathAttr_sourceId_pwaySrc&1
+ ON &prefixPathwayAttributes&1 (source_id, pathway_source)
+
]]>
+
+
+
+
+ synteny stats for each reference-taxon / comparison-taxon pair
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
= 100
- AND s.is_best_alignment in (1)
- AND s.percent_est_bases_aligned >= 20
- AND s.percent_identity >= 90
- AND e.best_alignment_count <= 1
- AND e.source_id = s.accession
- GROUP by s.gene HAVING count(*) >= 1
- ) est ON ga.source_id = est.source_id
- RIGHT OUTER JOIN (
- SELECT project_id, taxon_id,
- max(database_version) as database_version,
- CASE WHEN ncbi_tax_id > 9000000000 THEN NULL
- ELSE ncbi_tax_id
- END ncbi_tax_id,
- to_char(sum(length)/1000000,'9999.99') as megabps
- FROM GenomicSeqAttributes
- WHERE is_top_level = 1
- GROUP BY project_ID, taxon_id, ncbi_tax_id
- ) genomestat ON genomestat.taxon_id = ga.taxon_id
- LEFT OUTER JOIN (
- SELECT count(distinct ga.source_id) as ct, ga.taxon_id
- FROM GeneAttributes ga, SnpAttributes sf
- WHERE sf.gene_source_id = ga.source_id
- AND ga.is_deprecated = 0
- GROUP BY ga.taxon_id
- ) snpCount ON ga.taxon_id = snpCount.taxon_id
- GROUP BY genomestat.taxon_id,
- genomestat.project_id,
- genomestat.database_version,
- genomestat.ncbi_tax_id,
- genomestat.Megabps,
- snpCount.ct
+ DO $$
+ DECLARE
+ idlist RECORD;
+ BEGIN
+ FOR idlist IN ( SELECT DISTINCT organism FROM GeneAttributes )
+ LOOP
+ INSERT INTO TranscriptPathway&1
+ WITH transcript_ec AS (
+ SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
+ regexp_count( ec.ec_number, '-') as wildcard_count
+ FROM sres.EnzymeClass ec
+ WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM dots.AaSequenceEnzymeClass)
+ ),
+ pathway_node_ec AS (
+ SELECT distinct pn.pathway_id, pn.row_id as enzyme_class_id
+ FROM sres.PathwayNode pn, sres.ontologyterm ot
+ WHERE pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'enzyme'
+ AND pn.display_label != '-.-.-.-'
+ ),
+ pathway_ec AS (
+ SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
+ regexp_count( ec.ec_number, '-') as wildcard_count
+ FROM sres.EnzymeClass ec
+ WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM pathway_node_ec)
+ GROUP BY ec.enzyme_class_id
+ ),
+ ec_match AS (
+ SELECT tec.enzyme_class_id as transcript_enzyme_class_id,
+ pec.enzyme_class_id as pathway_enzyme_class_id,
+ tec.wildcard_count as wildcard_count_transcript,
+ pec.wildcard_count as wildcard_count_pathway,
+ tec.ec_number as ec_number_transcript,
+ pec.ec_number as ec_number_pathway
+ FROM transcript_ec tec, pathway_ec pec
+ WHERE (tec.ec_number_1 = pec.ec_number_1 or tec.ec_number_1 is null or pec.ec_number_1 is null)
+ AND (tec.ec_number_2 = pec.ec_number_2 or tec.ec_number_2 is null or pec.ec_number_2 is null)
+ AND (tec.ec_number_3 = pec.ec_number_3 or tec.ec_number_3 is null or pec.ec_number_3 is null)
+ AND (tec.ec_number_4 = pec.ec_number_4 or tec.ec_number_4 is null or pec.ec_number_4 is null)
+ )
+ SELECT DISTINCT ga.source_id
+ , ga.gene_source_id
+ , ga.project_id
+ , pa.source_id as pathway_source_id
+ , pa.name as pathway_name
+ , ec_match.ec_number_transcript as ec_number_gene
+ , ec_match.wildcard_count_transcript as wildcard_count_gene
+ , ec_match.ec_number_pathway
+ , ec_match.wildcard_count_pathway
+ , CASE WHEN ec_match.ec_number_pathway = ec_match.ec_number_transcript
+ THEN 1
+ ELSE 0 END as exact_match
+ , CASE WHEN ec_match.wildcard_count_pathway + ec_match.wildcard_count_transcript = 0
+ THEN 1
+ ELSE 0 END as complete_ec
+ , pa.pathway_id
+ , pa.pathway_source
+ , p.external_database_release_id
+ FROM PathwayAttributes pa
+ , sres.pathway p
+ , pathway_node_ec pec
+ , ec_match
+ , dots.AaSequenceEnzymeClass asec
+ , TranscriptAttributes ga
+ WHERE ga.organism = idlist.organism
+ AND pa.pathway_id = pec.pathway_id
+ AND p.pathway_id = pa.pathway_id
+ AND pec.enzyme_class_id = ec_match.pathway_enzyme_class_id
+ AND asec.enzyme_class_id = ec_match.transcript_enzyme_class_id
+ AND ga.aa_sequence_id = asec.aa_sequence_id
+ AND (
+ (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived')
+ OR ga.orthomcl_name IS NOT NULL
+ )
+ ;
+ commit;
+ END LOOP;
+ END;
+ $$ LANGUAGE PLPGSQL;
]]>
10000000
- -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' ||
- -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1"
- -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits
- then 'TMPTX_' || o.public_abbrev
- else 'NCBITAXON_' || t.ncbi_tax_id
- end as source_id,
- o.abbrev as internal_abbrev,
- o.public_abbrev,
- o.orthomcl_abbrev,
- o.family_name_for_files,
- tn.name as organism_name,
- o.genome_source,
- o.strain_abbrev,
- o.is_annotated_genome,
- o.is_reference_strain,
- o.is_family_representative,
- o.name_for_filenames,
- o.taxon_id as component_taxon_id,
- gc.database_version,
- gc.megabps as megabps,
- gc.ncbi_tax_id as ncbi_tax_id,
- gc.snpCount as snpCount,
- gc.geneCount as geneCount,
- gc.pseudoGeneCount as pseudoGeneCount,
- gc.codingGeneCount as codingGeneCount,
- gc.otherGeneCount as otherGeneCount,
- gc.ChipChipGeneCount as ChipChipGeneCount,
- gc.orthologCount as orthologCount,
- gc.goCount as goCount,
- gc.tfbsCount as tfbsCount,
- gc.proteomicsCount as proteomicsCount,
- gc.estCount as estCount,
- gc.ecNumberCount as ecNumberCount,
- cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar,
- cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate,
- cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset,
- cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope,
- cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray,
- coalesce(oc.hasCentromere, 0) as hasCentromere,
- coalesce(sc.contig_num, 0) as contigCount,
- coalesce(sc.supercont_num, 0) as supercontigCount,
- coalesce(sc.chrom_num, 0) as chromosomeCount,
- coalesce(cc.communityCount, 0) as communityCount,
- coalesce(psc.popsetCount, 0) as popsetCount,
- coalesce(pc.geneArrayCount, 0) as arrayGeneCount,
- coalesce(pc.rnaSeqCount, 0) as rnaSeqCount,
- coalesce(pc.rtPCRCount, 0) as rtPCRCount,
- coalesce(ta.avg_transcript_length, 0) as avg_transcript_length
- FROM apidb.Organism o
- INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id
- INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id
- LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id
- LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id
- LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id
- LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id
- LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id
- LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id
- LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id
- LEFT JOIN (
- SELECT taxon_id, round(avg(length),1) as avg_transcript_length
- FROM TranscriptAttributes
- GROUP by taxon_id
- ) ta ON o.taxon_id = ta.taxon_id
- WHERE tn.name_class = 'scientific name'
- ) oa,
- TaxonSpecies ts,
- sres.taxon t,
- sres.taxonname tn2
- WHERE oa.component_taxon_id = ts.taxon_id
- AND ts.species_taxon_id = t.taxon_id
- AND ts.species_taxon_id = tn2.taxon_id
- AND tn2.name_class = 'scientific name'
+ create index TranscriptPath_ix&1
+ on TranscriptPathway&1(gene_source_id, source_id, pathway_source_id,
+ pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway,
+ ec_number_pathway, pathway_source)
+
]]>
-
-
-
- Stores, for each transcript, a string containing the gene-relative coordinates
- of all its introns and UTRs.
-
-
-
+
+
+
+
-
+ ]]>
-
-
-
- Stores special webservice abbreviations which are not standard organism
- names. Each record maps an organism name onto this abbreviation, as
- well as the species name and project ID. Used by the model and as an
- input in the creation of the OrganismAbbreviationBlast tuning table.
- Propagated to portal instances.
-
-
-
+
+
+
+
+
+
+
+
+
-
-
-
-
- Group species by higher level taxonomy. Each row associates a taxon of
- interest with one of its ancestors in the taxon tree. Used in parameter
- queries that have to know about the taxon tree. Propagated to portal
- instances.
-
-
-
-
-
+
+ the max and min depth of each ontology term in OntologyRelationship. Used by the GoTermSummary tuning table
+
+
+
+
+
+
+
+
+ GeneGoTerms: each row represents one GO term assignment to one gene, right from what was loaded.
+
+
+
+
+
+
+
+
+
+
+
-
- Each record maps an organism to its BLAST abbreviation. Used by
- BLAST-query parameters. Propagated to portal instances.
+
+ A tuning table for the gene record GO term table
-
-
-
-
+
+
+
+
+
-
- For each project, show which BLAST databases are available for which
- species. Used in BLAST param queries. Propagated to portal instances.
+
+
+ Map each GO term that is assigned to at least one gene to a GoSubset term
+ that is either itself or an immediate ancestor. By "immediate ancestor"
+ we mean an ancestor such that there isn't an intermediate ancestor also
+ in the subset. (Note that there can be multiple links as long as none is
+ in the subset.) This is currently restricted to 'goslim_generic', solely
+ by the condition in the SUBSET_TERM subquery.
-
-
-
-
-
-
-
+
+
+
+
+
+
-
- Each row stores mass-spec. based expression evidence for one sample of
- one experiment for one gene. Used for mass spec queries in the model,
- GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary
- tuning table.
+
+
+ GoTermSummary: each row represents one GO term assignment to one gene.
+ (Typically, a gene has multiple such assignments.) This is used for
+ finding gene-GO mappings, such as for the gene-page GO table.
-
-
-
-
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
- Stores summary information from annotated genomes to facilitate overview section of gene page
+
+ Each row maps a dataset onto an ID for which the dataset contains data;
+ each dataset gets one such row.
+ Used in dataset record queries.
+
-
-
-
-
+
-
- Mass-spec experiment results for a peptide. Used by the model, GBrowse,
- and PBrowse.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores per-organism information. Used by the organism record, as well
+ as by project_id(), the function that maps an organism to a project.
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0 then 1 else 0 end as hasCentromere
+ FROM DOTS.MISCELLANEOUS f
+ , sres.ontologyTerm ot
+ , dots.nasequence s
+ WHERE ot.ontology_term_id = f.sequence_ontology_id
+ AND ot.name='centromere'
+ AND f.na_sequence_id = s.na_sequence_id
+ GROUP BY s.taxon_id
]]>
-
-
-
-
-
- Data from the Seattle Structural Genomics Center for Infectious Disease,
- populated from their web service. Used in the gene record.
-
-
-
-
-
-
-
- Used by the model and GBrowse, as well as an input in the creation of
- the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary.
-
-
-
-
+
+ = g.start_min
+ AND g.na_sequence_id = seq.na_sequence_id
+ AND t.name = 'ExternalNASequence'
+ ) gene
+ ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id)
+ GROUP BY sim.taxon_id
+ ]]>
+
+
+ = 100
+ AND s.is_best_alignment in (1)
+ AND s.percent_est_bases_aligned >= 20
+ AND s.percent_identity >= 90
+ AND e.best_alignment_count <= 1
+ AND e.source_id = s.accession
+ GROUP by s.gene HAVING count(*) >= 1
+ ) est ON ga.source_id = est.source_id
+ RIGHT OUTER JOIN (
+ SELECT project_id, taxon_id,
+ max(database_version) as database_version,
+ CASE WHEN ncbi_tax_id > 9000000000 THEN NULL
+ ELSE ncbi_tax_id
+ END ncbi_tax_id,
+ to_char(sum(length)/1000000,'9999.99') as megabps
+ FROM GenomicSeqAttributes
+ WHERE is_top_level = 1
+ GROUP BY project_ID, taxon_id, ncbi_tax_id
+ ) genomestat ON genomestat.taxon_id = ga.taxon_id
+ LEFT OUTER JOIN (
+ SELECT count(distinct ga.source_id) as ct, ga.taxon_id
+ FROM GeneAttributes ga, SnpAttributes sf
+ WHERE sf.gene_source_id = ga.source_id
+ AND ga.is_deprecated = 0
+ GROUP BY ga.taxon_id
+ ) snpCount ON ga.taxon_id = snpCount.taxon_id
+ GROUP BY genomestat.taxon_id,
+ genomestat.project_id,
+ genomestat.database_version,
+ genomestat.ncbi_tax_id,
+ genomestat.Megabps,
+ snpCount.ct
+ ]]>
+
+
+ 10000000
+ -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' ||
+ -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1"
+ -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits
+ then 'TMPTX_' || o.public_abbrev
+ else 'NCBITAXON_' || t.ncbi_tax_id
+ end as source_id,
+ o.abbrev as internal_abbrev,
+ o.public_abbrev,
+ o.orthomcl_abbrev,
+ o.family_name_for_files,
+ tn.name as organism_name,
+ o.genome_source,
+ o.strain_abbrev,
+ o.is_annotated_genome,
+ o.is_reference_strain,
+ o.is_family_representative,
+ o.name_for_filenames,
+ o.taxon_id as component_taxon_id,
+ gc.database_version,
+ gc.megabps as megabps,
+ gc.ncbi_tax_id as ncbi_tax_id,
+ gc.snpCount as snpCount,
+ gc.geneCount as geneCount,
+ gc.pseudoGeneCount as pseudoGeneCount,
+ gc.codingGeneCount as codingGeneCount,
+ gc.otherGeneCount as otherGeneCount,
+ gc.ChipChipGeneCount as ChipChipGeneCount,
+ gc.orthologCount as orthologCount,
+ gc.goCount as goCount,
+ gc.tfbsCount as tfbsCount,
+ gc.proteomicsCount as proteomicsCount,
+ gc.estCount as estCount,
+ gc.ecNumberCount as ecNumberCount,
+ cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar,
+ cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate,
+ cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset,
+ cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope,
+ cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray,
+ coalesce(oc.hasCentromere, 0) as hasCentromere,
+ coalesce(sc.contig_num, 0) as contigCount,
+ coalesce(sc.supercont_num, 0) as supercontigCount,
+ coalesce(sc.chrom_num, 0) as chromosomeCount,
+ coalesce(cc.communityCount, 0) as communityCount,
+ coalesce(psc.popsetCount, 0) as popsetCount,
+ coalesce(pc.geneArrayCount, 0) as arrayGeneCount,
+ coalesce(pc.rnaSeqCount, 0) as rnaSeqCount,
+ coalesce(pc.rtPCRCount, 0) as rtPCRCount,
+ coalesce(ta.avg_transcript_length, 0) as avg_transcript_length
+ FROM apidb.Organism o
+ INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id
+ INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id
+ LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id
+ LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id
+ LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id
+ LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id
+ LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id
+ LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id
+ LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id
+ LEFT JOIN (
+ SELECT taxon_id, round(avg(length),1) as avg_transcript_length
+ FROM TranscriptAttributes
+ GROUP by taxon_id
+ ) ta ON o.taxon_id = ta.taxon_id
+ WHERE tn.name_class = 'scientific name'
+ ) oa,
+ TaxonSpecies ts,
+ sres.taxon t,
+ sres.taxonname tn2
+ WHERE oa.component_taxon_id = ts.taxon_id
+ AND ts.species_taxon_id = t.taxon_id
+ AND ts.species_taxon_id = tn2.taxon_id
+ AND tn2.name_class = 'scientific name'
+ ]]>
+
+
+ -->
-
- Used by the model when writing profile data
-
-
+ ]]>
+
-
-
- Associates an organism with the GBrowse and PBrowse tracks available
- for it. Used by the gene record.
+
+ Each record maps a gene to a PDB structure. Used by the model to find
+ genes that have a PDB structure and to find the PDB structures for a
+ given gene.
-
-
-
+
+
+
+
+
+
+
+
+ GeneId maps any valid ID for a gene onto its official ID. These two quantities
+ are stored in the "id" and "gene" columns, respectively. The "unique_mapping"
+ column is set to 1 for IDs which map to only one gene.
-
- Each row maps a dataset onto an ID for which the dataset contains data;
- each dataset gets one such row.
- Used in dataset record queries.
+ Most of the CREATE TABLE statement is made up of the union of nine subqueries,
+ each of which looks in a different place for gene IDs. Each subquery populates
+ the "union_member" field with a different literal string, to make it easier to
+ understand which part (or parts) of the SQL is responsible for each ID-to-gene
+ mapping.
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
= pred_loc.start_min
+ AND pred_loc.is_reversed = gene_loc.is_reversed
+ AND pred_loc.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT ng.name AS id, gf.source_id AS gene,
+ 'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */
+ FROM dots.GeneFeature gf, dots.NaFeatureNaGene nfng, dots.NaGene ng,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE gf.na_feature_id = nfng.na_feature_id
+ AND ng.na_gene_id = nfng.na_gene_id
+ AND gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT source_id AS id, source_id AS gene,
+ 'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */
+ FROM dots.GeneFeature gf,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ WHERE gf.external_database_release_id = edr.external_database_release_id
+ AND edr.external_database_id = ed.external_database_id
+ UNION
+ SELECT n.name AS id, gf.source_id AS gene,
+ 'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name
+ from dots.genefeature gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d,
+ ( select na_feature_id, name
+ from apidb.GeneFeatureName
+ where is_preferred = 1
+ EXCEPT
+ -- suppress gene/name associations from the *DELETED_RSRC databases
+ select gfn.na_feature_id, gfn.name
+ from apidb.GeneFeatureName gfn,
+ sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr
+ where gfn.external_database_release_id = edr.external_database_release_id
+ and ed.external_database_id = edr.external_database_id
+ and ed.name like '%DELETED_RSRC'
+ ) n
+ where n.na_feature_id = gf.na_feature_id
+ and gf.external_database_release_id = r.external_database_release_id
+ and r.external_database_id = d.external_database_id
+ UNION
+ select dr.primary_identifier as id,
+ gf.source_id as gene,
+ 'AA feature DbRef primary ID' as union_member,
+ ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */
+ from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf,
+ dots.DbRefAaFeature draf, sres.DbRef dr,
+ sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed
+ where gf.na_feature_id = t.parent_id
+ and t.na_feature_id = taf.na_feature_id
+ and taf.aa_feature_id = draf.aa_feature_id
+ and draf.db_ref_id = dr.db_ref_id
+ and dr.external_database_release_id = edr.external_database_release_id
+ and edr.external_database_id = ed.external_database_id
+ and ed.name
+ not in ('INTERPRO', 'PFAM', 'PIRSF', 'PRODOM', 'PROSITEPROFILES',
+ 'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER',
+ 'PRINTS','SCANPROSITE','SFLD')
+ ) mapping,
+ dots.GeneFeature gf, dots.NaSequence ns
+ WHERE mapping.gene = gf.source_id
+ AND gf.na_sequence_id = ns.na_sequence_id
+ AND (ns.taxon_id::varchar = '&filterValue' or length('&filterValue') = 0)
+ AND (gf.is_predicted != 1 OR gf.is_predicted is null)
+ GROUP BY mapping.id, mapping.gene
]]>
-
-
-
- Citation info for proteomics datasets, used by GBrowse
-
-
' || sample || '' as sample_i
- FROM MSPeptideSummary mps, DatasetPresenter ds
- -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem
- WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern)
- ) t
- group by name, id
- )
- SELECT name,
- substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable')
- || ' PMID: ' || publications || 'Samples:
'
- || sample_table || chr(10) ||
- ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation
- FROM (
- SELECT ds.name as name, ds.summary as description, pubs.contact_email as email,
- pubs.pmids as publications, samples.sample_table as sample_table
- FROM DatasetPresenter ds, pubs, samples
- WHERE ds.dataset_presenter_id = pubs.id
- AND ds.dataset_presenter_id = samples.id
+ INSERT INTO &prefixGeneId&1
+ (id, gene, unique_mapping, union_member, database_name)
+ WITH munge
+ AS (SELECT DISTINCT
+ regexp_replace(id, '\.\d\d?$', '') as id,
+ gene, unique_mapping, union_member, database_name
+ FROM &prefixGeneId&1
+ WHERE regexp_like(id, '(.*)\.\d\d?$')
+ )
+ SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name
+ FROM munge
+ WHERE id NOT IN (SELECT id FROM &prefixGeneId&1)
+ ]]>
+
+
+
-
-
-
+
+ -->
-
-
-
-
-
-
-
-
-
+ ]]>
+
+
+
+
+
+
-
-
-
-
-
-
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
-
-
-
-
-
-
-
-
-
-
-
+
+ This table maps IDs for a sequence onto the official ID of the sequence.
+ It is analogous to GeneId, which does the same thing for genes. Used by
+ genomic-sequence record queries, by the sequence retrieval tool, and by
+ the BasketFixer, which updates users' baskets at release time to replace
+ old IDs with updated ones.
+
+
+
+
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores (transcript, sequence, distance from centromere) 3-tuples for transcripts
+ that lie on a sequence for which we have a centomere location.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The BFMV for proteins. Each protein gets a single record, which
+ stores all its attributes. Used mainly to create TranscriptAttributes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The BFMV for the gene record. Each gene gets a single record, which
+ stores all its attributes. Used widely, in the model and elsewhere, for
+ queries involving genes, as well as in the creation of more than a
+ dozen other tuning tables.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 10 )
+ WHERE ta.project_id = 'TriTrypDB'
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores, for each transcript, a string containing the gene-relative coordinates
+ of all its introns and UTRs.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A single product string per gene
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The BFMV for the gene record. Each gene gets a single record, which
+ stores all its attributes.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record represents one SNP. Widely used in the model, as well as in
+ the creation of several other tuning tables, Includes only NGS SNPs.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each row represents one EST. Used widely in the model, and to make the
+ tuning tables BlastTypes, OrganismAbbreviationBlast, and OrganismAttributes.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each row represents a colocated EST alignment - gene pair. Used by the
+ model, by generateGeneMetrics, and in the creation of the
+ OrganismAttributes tuning table
+
+
+
+
+
+
+
+
+
+
+
+
+ = 0
+ AND query_sequence.na_sequence_id = ba.query_na_sequence_id
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each row represents one cosmid or bac end feature; for use in JBrowse.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record captures info for a strain/protocol app node. Used in the model, including
+ gene and SNP queries, as well as the gene record.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record captures info for a strain/protocol app node. Used in the model, including
+ gene and SNP queries, as well as the gene record.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The BFMV for the WDK popset record. Widely used in the model for
+ queries related to popsets.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Used by the GenesByChipChip(Plasmo|Toxo) query, as well as by
+ generateGeneMetrics. Also an input to OrganismAttributes.
+
+
+
+
+
+
+ 0 */
+ CASE WHEN ta.is_reversed = 0
+ THEN ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)
+ ELSE ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)
+ END > 0
+ THEN
+ CASE
+ WHEN ta.is_reversed = 0
+ THEN '-'
+ ELSE '+'
+ END
+ ELSE
+ CASE
+ WHEN ta.is_reversed = 1
+ THEN '-'
+ ELSE '+'
+ END
+ END as direction,
+ sr.score1 as score
+ FROM TranscriptAttributes ta,
+ Results.segmentresult sr,
+ Study.StudyLink sl,
+ Study.Study s
+ WHERE sr.na_sequence_id = ta.na_sequence_id
+ AND s.study_id = sl.study_id
+ AND sl.protocol_app_node_id = sr.protocol_app_node_id
+ AND lower(s.name) like '%chip%peaks'
+ AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000)
+ or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) )
+ ]]>
+
+
+
+
+
+
+
+
+
+
+ Used by gene queries, as well as by generateGeneMetrics. Also an input
+ to OrganismAttributes.
+
+
+
+
+
+ 0 */
+ CASE WHEN ga.is_reversed = 0
+ THEN ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)
+ ELSE ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)
+ END > 0
+ THEN
+ CASE
+ WHEN ga.is_reversed = 0
+ THEN '-'
+ ELSE '+'
+ END
+ ELSE
+ CASE
+ WHEN ga.is_reversed = 1
+ THEN '-'
+ ELSE '+'
+ END
+ END as direction,
+ aef.*
+ FROM dots.BindingSiteFeature aef,
+ apidb.FeatureLocation arrloc,
+ GeneAttributes ga
+ WHERE aef.na_feature_id = arrloc.na_feature_id
+ AND arrloc.na_sequence_id = ga.na_sequence_id
+ AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000)
+ or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) )
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+ Each record maps a gene onto a subcellular location. Used by
+ GenesBySubcellularLocalization.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Like dots.SimilaritySpan, except that for sequences that are mapped by
+ SequencePiece into parts of other sequences, both locations are stored.
+ Used by GBrowse, and also in the creation of the Blastx tuning table.
+
+
+
+
+
+
+
+ = sim.max_query_end
+ AND sim.query_id = contig.na_sequence_id
+ AND sp.virtual_na_sequence_id = scaffold.na_sequence_id
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ SNP Chip only, such as Plasmo barcode, 3k_chp and hd_array
+
+ Each record represents one SNP. Widely used in the model, as well as in
+ the creation of several other tuning tables
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this otherwise-unneeded tuning table, which depends on SnpAttributesDoTS,
+ exists so that the view SnpChipAttributes can be created as a side-effect
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record stores a Blastp similarity of a gene. Used by the gene-page
+ Blastp table.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record stores a colocated (gene, popset) 2-tuple. Used by the
+ gene page as well as the PopsetByOverlap query.
+
+
+
+
+
+ sim.min_subject_start
+ AND sim.query_id = ia.na_sequence_id
+ GROUP BY ia.source_id, fl.feature_source_id
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+ Each record maps a taxon_id of interest onto the taxon_id of that taxon's
+ taxon-tree ancestor whose rank is "species". Used by
+ gene queries, and as an input in the
+ creation of several tuning tables, including GeneAttributes
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record stores a Blastx similarity. Used by GBrowse for the
+ match:WU_BLASTX track.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record stores the transcript sequence of one gene. Used by the
+ gene record and the sequence retrieval tool. Propagated to the portal.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record stores the coding sequence of one gene. Used by the
+ gene record and the sequence retrieval tool. Propagated to the portal.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record stores the coding sequence of one gene. Used by the
+ gene record and the sequence retrieval tool, as well as by
+ buildTrackOldAnnotationTT. Propagated to the portal.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record stores the nucleotide sequence for one genomic sequence
+ that is "official" (in the sense that it can be instantiated as a WDK
+ sequence record. Used by generatePathoLogicFile and the sequence
+ retrieval tool Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record stores the nucleotide sequence of an EST, for use by the
+ relevant attribute query in the WDK EST record. Propagated to portal
+ instances.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Mapping table of experiment and sample names to junction protocol_app_node_id
+
+
+
+
+
+
+
+
+
+
+
+ = 1
+ GROUP BY protocol_app_node_id
+ ), part AS (
+ SELECT
+ ij.junctions_pan_id, ij.avg_value, stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT * FROM (
+ SELECT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ) t
+ ORDER BY junctions_pan_id
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Table collects up single row / intronjunction (identified as all junctions with same start, end and strand). Statistics are generated including percentages of max intron score and ratios vs expression on an overall level.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 500000 THEN 500000 ELSE step_mult END as seq_step_mult
+ FROM (
+ SELECT gs.na_sequence_id, gs.length, gs.taxon_id, 25000 * (1 + floor(gs.length/count(*))) as step_mult
+ FROM apidb.intronjunction ij, dots.nasequence gs
+ WHERE gs.na_sequence_id = ij.na_sequence_id
+ GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id
+ ) t
+ ORDER BY taxon_id
+ )
+ LOOP
+ iter_length := idlist.seq_step_mult;
+ i_first_pos := 1;
+ i_last_pos := i_first_pos + iter_length;
+ WHILE i_first_pos < idlist.length
+ LOOP
+ INSERT INTO GIJtmp
+ SELECT DISTINCT
+ junc.*,
+ CASE
+ WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed
+ THEN 1
+ ELSE 0
+ END as matches_gene_strand,
+ last_value(ga.source_id) over w1 as gene_source_id,
+ last_value(ga.na_feature_id) over w1 as gene_na_feature_id,
+ CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron
+ FROM (
+ SELECT ij.na_sequence_id,seq.source_id as sequence_source_id,ij.segment_start,ij.segment_end,
+ sum(ij.unique_reads) as total_unique, round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm,
+ ij.is_reversed,seq.source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id
+ FROM apidb.intronjunction ij, namemappinggij je, dots.nasequence seq
+ WHERE ij.na_sequence_id = idlist.na_sequence_id
+ AND ij.segment_start between i_first_pos and i_last_pos
+ AND ij.na_sequence_id = seq.na_sequence_id
+ AND ij.unique_reads >= 1
+ AND je.junctions_pan_id = ij.protocol_app_node_id
+ AND je.multiplier < 20
+ GROUP BY ij.na_sequence_id,ij.segment_start,ij.segment_end, ij.is_reversed,seq.source_id
+ ) junc
+ LEFT JOIN GeneIdLocGIJ&1 ga ON
+ junc.na_sequence_id = ga.na_sequence_id
+ AND junc.segment_start >= ga.start_min
+ AND junc.segment_end <= ga.end_max
+ AND junc.is_reversed = ga.is_reversed
+ LEFT JOIN annotgij ag ON
+ junc.na_sequence_id = ag.na_sequence_id
+ AND junc.segment_start = ag.start_min
+ AND junc.segment_end = ag.end_max
+ AND junc.is_reversed = ag.is_reversed
+ WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron')
+ WINDOW w1 AS (
+ PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type
+ ORDER BY ga.total_expression ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ ;
+ commit;
+ i_first_pos := i_last_pos + 1;
+ i_last_pos := i_first_pos + iter_length;
+ END LOOP;
+ END LOOP;
+ END;
+ $$ LANGUAGE PLPGSQL;
+ ]]>
+
+
+
+
+
+ 0 THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) ELSE null END as percent_max,
+ CASE WHEN maxv.gene_source_id is not null THEN 1 ELSE 0 END as contained,
+ CAST (null as numeric(10)) as taxon_id,
+ cast (null as numeric(10)) as upstream_gene_id,
+ cast (null as numeric) as upstream_distance,
+ cast (null as numeric(10)) as downstream_gene_id,
+ cast (null as numeric) as downstream_distance
+ FROM
+ gijtmp junc LEFT JOIN
+ (
+ SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm
+ FROM gijtmp
+ WHERE gene_source_id is not null
+ GROUP BY gene_source_id
+ ) maxv ON junc.gene_source_id = maxv.gene_source_id
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores statistics for annotated introns used for configuring JBrowse tracks based on organism.
+
+
+
+
+
+
+
+
+
+
+
+ Stores maximum values per gene for each sample so percent max intron can be computed for sample table.
+
+
+
+
+
+
+
+
+ = j.segment_end
+ AND ga.is_reversed = j.is_reversed
+ AND j.protocol_app_node_id = mult.junctions_pan_id
+ GROUP BY j.protocol_app_node_id, ga.source_id
+ );
+ commit;
+ END LOOP;
+ END;
+ $$ LANGUAGE PLPGSQL;
+ ]]>
+
+
+
+
+
+
+
+ Each record stores the nucleotide sequence of one popset. Used in the
+ relevant attribute query of the WDK popset record, as well as by
+ PopsetClustalOmega. Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record maps an organism name onto an abbreviation, getting the
+ pair either from apidb.Organism or (in the case of Tvag), hardwired
+ into the below SQL. This table will eventually be replaced by workflow.
+
+
+
+
+
+
+
+
+
+
+ Stores special webservice abbreviations which are not standard organism
+ names. Each record maps an organism name onto this abbreviation, as
+ well as the species name and project ID. Used by the model and as an
+ input in the creation of the OrganismAbbreviationBlast tuning table.
+ Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+ Groups projects by higher level taxonomy. Used in the creation of the
+ OldOrganismTree tuning table.
+
+
+
+
+
+
+
+
+
+ Group species by higher level taxonomy. Each row associates a taxon of
+ interest with one of its ancestors in the taxon tree. Used in parameter
+ queries that have to know about the taxon tree, as well as
+ apidb.project_id(), the function that maps an organism name to a
+ project. Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Group species by higher level taxonomy. Each row associates a taxon of
+ interest with one of its ancestors in the taxon tree. Used in parameter
+ queries that have to know about the taxon tree. Propagated to portal
+ instances.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record maps an organism to its BLAST abbreviation. Used by
+ BLAST-query parameters. Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+
+
+ For each project, show which BLAST databases are available for which
+ species. Used in BLAST param queries. Propagated to portal instances.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Properties table for ChEBI Compounds.
+
+
+
+
+
+
+
+
+
+
+
+ The BFMV for the compound WDK record type. Used by the model for the
+ compound record and queries, as well as in the creation of the
+ PathwayCompounds tuning table. Propagated to portal instances.
+ Note: children of ChEBI compounds are excluded, but data of these is gathered in the (parent) entries.
+
+
+
+
+
+
+
+
+
+
+
+
+ Alias table for Compounds.
+ Compound column is the source_id from CompoundAttributes (chEBI_ID).
+ ID can have this same ID, or mapping KEGG ID, or Name, or Synonym.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Each record represents a 5-tuple of (reaction, compound, pathway,
+ enzyme, type). Used extensively in the model for pathway-related
+ queries, as well as by getImageMap.pl.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Aggregates reactions irrespective of pathway. Required to determine if BioCyc reactions are reversible. Used extensively in the model in conjunction with pathwaycompounds for pathway related queries
+
+
+
+
+
+
+
+
+
+ ' || o.enzyme || '' ELSE o.enzyme END as expasy_html
+ FROM (
+ SELECT i.*
+ , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-'
+ THEN
+ 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1='
+ || ec.ec_number_1
+ || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END
+ || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END
+ || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END
+ ELSE reaction_url END as expasy_url
+ , ec.description as enzyme_description
+ FROM (
+ SELECT
+ reaction_id
+ , reaction_source_id
+ , reaction_url
+ , ext_db_name
+ , ext_db_version
+ , enzyme
+ , substrates_html || ' ' || sign || ' ' || products_html as equation_html
+ , substrates_text || ' ' || sign || ' ' || products_text as equation_text
+ , case when sign = '<=>' then 1 else 0 end as is_reversible
+ , substrates_text
+ , products_text
+ FROM (
+ SELECT
+ reaction_id
+ , reaction_source_id
+ , reaction_url
+ , ext_db_name
+ , ext_db_version
+ , enzyme
+ , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign
+ , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html
+ , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text
+ , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html
+ , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text
+ FROM (
+ WITH rep AS (
+ SELECT DISTINCT
+ pr.PATHWAY_REACTION_ID as reaction_id
+ , pr.SOURCE_ID as reaction_source_id
+ , pn.DISPLAY_LABEL as enzyme
+ , coalesce(ca.compound_name, pc.compound_source_id) as compound
+ , prel.is_reversible as is_reversible_og
+ , last_value(prel.is_reversible) OVER (partition by pr.pathway_reaction_id ORDER BY prel.is_reversible ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) as is_reversible
+ , first_value(pc.type) over (partition by pr.pathway_reaction_id, pr.SOURCE_ID, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE, coalesce(pc.chebi_accession, pc.compound_source_id), coalesce(ca.compound_name, pc.compound_source_id) ORDER BY pc.pathway_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as type
+ FROM
+ sres.pathway p
+ , apidb.pathwayreaction pr
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.ONTOLOGYTERM ot
+ , PathwayCompounds pc
+ LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.name = 'enzyme'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND pc.PATHWAY_ID = p.PATHWAY_ID
+ AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
+ )
+ SELECT DISTINCT
+ pr.PATHWAY_REACTION_ID as reaction_id
+ , pr.SOURCE_ID as reaction_source_id
+ , ed.NAME as ext_db_name
+ , edr.VERSION as ext_db_version
+ , cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme
+ , min(rep.is_reversible) as is_reversible
+ , min(rep.type) as type
+ , string_agg (pc.type, ',' order by p.pathway_id) as type_list
+ , coalesce(ca.compound_name, pc.compound_source_id) as compound
+ , CASE
+ WHEN coalesce(pc.CHEBI_ACCESSION, pc.compound_source_id) LIKE 'CHEBI%'
+ THEN '' || coalesce(ca.compound_name, pc.compound_source_id) || ''
+ ELSE coalesce(pc.chebi_accession, pc.compound_source_id)
+ END as compound_url
+ , CASE (replace (replace (ed.name, 'Pathways_', ''), '_RSRC', ''))
+ WHEN 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || pr.source_id
+ WHEN 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || pr.source_id
+ WHEN 'FungiCyc' THEN NULL
+ END as reaction_url
+ FROM
+ sres.pathway p
+ , apidb.pathwayreaction pr
+ , APIDB.PATHWAYREACTIONREL prr
+ , SRES.PATHWAYNODE pn
+ , SRES.PATHWAYRELATIONSHIP prel
+ , SRES.EXTERNALDATABASE ed
+ , SRES.EXTERNALDATABASERELEASE edr
+ , SRES.ONTOLOGYTERM ot
+ , rep
+ , PathwayCompounds pc
+ LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id
+ WHERE p.PATHWAY_ID = prr.PATHWAY_ID
+ AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID
+ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID
+ AND prel.NODE_ID = pn.PATHWAY_NODE_ID
+ AND ot.name = 'enzyme'
+ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID
+ AND pc.EXT_DB_NAME = ed.NAME
+ AND pc.EXT_DB_VERSION = edr.VERSION
+ AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID
+ AND pc.PATHWAY_ID = p.PATHWAY_ID
+ AND pc.REACTION_id = pr.PATHWAY_REACTION_ID
+ AND rep.reaction_id = pr.pathway_reaction_id
+ AND rep.reaction_source_id = pr.source_id
+ AND rep.compound = coalesce(ca.compound_name, pc.compound_source_id)
+ AND rep.enzyme = pn.DISPLAY_LABEL
+ AND rep.is_reversible_og = prel.is_reversible
+ GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE
+ , coalesce(pc.chebi_accession, pc.compound_source_id)
+ , coalesce(ca.compound_name, pc.compound_source_id)
+ ) t1
+ GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible
+ ) t2
+ ) i
+ LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number
+ ) o
+ ]]>
+
+
+
+
+
+
+
+
+ Nodes and edges for pathway maps
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1
+ )
+ SELECT aee.e_id, pn.*
+ FROM pn
+ , AllEnzymeEdges aee
+ WHERE aee.all_edges = pn.all_edges
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Used by pathway table on gene pages
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ for use in GenesByReactionCompounds question
+
+
+
+
+
+
+
+
+
+ Each row stores mass-spec. based expression evidence for one sample of
+ one experiment for one gene. Used for mass spec queries in the model,
+ GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary
+ tuning table.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores summary information from annotated genomes to facilitate overview section of gene page
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Mass-spec experiment results for a transcript. Used in the model for queries
+ related to transcripts.
+
+
+
+
+
+
+
+
+
+
+
+
+ Mass-spec experiment results for a peptide. Used by the model, GBrowse,
+ and PBrowse.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Data from the Seattle Structural Genomics Center for Infectious Disease,
+ populated from their web service. Used in the gene record.
+
+
+
+
+
+
+
+ Used by the model and GBrowse, as well as an input in the creation of
+ the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary.
+
+
+
+
+
+
+
+ Used by the model when writing profile data
+
+
+
+
+
+
+
+ Associates an organism with the GBrowse and PBrowse tracks available
+ for it. Used by the gene record.
+
+
+
+
+
+
+
+
+ Associates an organism with the GBrowse and PBrowse tracks available
+ for it. Used by the gene record.
+
+
+
+
+
+
+
+
+ Each row maps a dataset onto an ID for which the dataset contains data;
+ each dataset gets one such row.
+ Used in dataset record queries.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Citation info for proteomics datasets, used by GBrowse
+
+
+
+ ' || sample || '' as sample_i
+ FROM MSPeptideSummary mps, DatasetPresenter ds
+ -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem
+ WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern)
+ ) t
+ group by name, id
+ )
+ SELECT name,
+ substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable')
+ || ' PMID: ' || publications || 'Samples:
'
+ || sample_table || chr(10) ||
+ ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation
+ FROM (
+ SELECT ds.name as name, ds.summary as description, pubs.contact_email as email,
+ pubs.pmids as publications, samples.sample_table as sample_table
+ FROM DatasetPresenter ds, pubs, samples
+ WHERE ds.dataset_presenter_id = pubs.id
+ AND ds.dataset_presenter_id = samples.id
+ ) t
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+ = 1.5
+ ) t
+ GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name,
+ start_min, end_max, start_max, end_min, organism
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ = commit_after THEN
- COMMIT;
- ctrows := 0;
- END IF;
- END LOOP;
- commit;
- END;
- $$ LANGUAGE PLPGSQL;
+ LOOP
+ ctrows := ctrows + 1;
+ INSERT INTO Profile&1
+ (DATASET_NAME, DATASET_TYPE, DATASET_SUBTYPE, PROFILE_TYPE, NODE_TYPE, SOURCE_ID, PROFILE_STUDY_ID, PROFILE_SET_NAME,
+ PROFILE_SET_SUFFIX, PROFILE_AS_STRING, MAX_VALUE, MIN_VALUE, MAX_TIMEPOINT, MIN_TIMEPOINT)
+ VALUES
+ (pf_rows.DATASET_NAME, pf_rows.DATASET_TYPE, pf_rows.DATASET_SUBTYPE, pf_rows.PROFILE_TYPE, pf_rows.NODE_TYPE, pf_rows.SOURCE_ID, pf_rows.PROFILE_STUDY_ID, pf_rows.PROFILE_SET_NAME,
+ pf_rows.PROFILE_SET_SUFFIX, pf_rows.PROFILE_AS_STRING, pf_rows.MAX_VALUE, pf_rows.MIN_VALUE, pf_rows.MAX_TIMEPOINT, pf_rows.MIN_TIMEPOINT);
+ IF ctrows >= commit_after THEN
+ COMMIT;
+ ctrows := 0;
+ END IF;
+ END LOOP;
+ commit;
+ END;
+ $$ LANGUAGE PLPGSQL;
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id)
+ has data for a profile_set.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ for gene-page expression graphs
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ For each distinct organism in GeneAttributes, all ancestors in the taxon tree. For the gene page.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Materialization of GeneTables.GeneModelDump.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Distinct filter_name for gene query summary. For the initial version,
+ at least, it's (GeneAttriutes.species UNION GeneAttributes.organism).
+ Note that the UNION implies set bahavior and therefore distinctness.
+
+
+
+
- ]]>
+
+
+
+ Info from dots.ExternalAaSequence records for BLAT alignments
+
+
+
+
+
+
+
+
+ Materialization of the orthology transform. Also useful for GeneTables.Orthologs.
+
+
+
+
+
+
+ create UNLOGGED table SyntenicPairs as
+ select distinct ga.na_feature_id, sg.syn_na_feature_id
+ from apidb.SyntenicGene sg, GeneAttributes ga
+ where sg.na_sequence_id = ga.na_sequence_id
+ and sg.end_max >= ga.start_min
+ and sg.start_min <= ga.end_max
+ ]]>
-
+
+
+
+
+
+
+
+
-
+ ]]>
+
+
+
+ For each RNA-Seq Dataset, compute the top 500 ratios of max/min gene expression.
+
+
+
+
+
+
+
-
-
-
-
-
-
-
+
+
+
+
+
+
+ for GeneTables.RodMalPhenotypeTable
+
+
+
+
+
+
+
+ results.source_id
+ OR knockdown.source_id is null
+ ORDER BY results.source_id, results.rmgmid
]]>
-
- For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id)
- has data for a profile_set.
-
-
-
-
+
+ Chromosome data for CNV queries
+
+
+
-
+
-
+ ]]>
+
+
+
-
-
- for gene-page expression graphs
-
-
-
-
-
-
-
+
+
+ Gene Data for CNV queries
+
+
+
-
+ CREATE TABLE GeneCopyNumbers&1 AS
+ SELECT DISTINCT ta.project_id
+ , ta.source_id
+ , ta.gene_source_id
+ , REGEXP_REPLACE(pan.name, '_[A-Za-z0-9]+ (.+)$', '') AS strain
+ , gcn.haploid_number AS raw_estimate
+ , gcn.ref_copy_number AS ref_cn
+ , CASE WHEN (gcn.haploid_number < 0.01) THEN 0
+ WHEN (0.01 < gcn.haploid_number AND gcn.haploid_number < 1.85) THEN 1
+ ELSE round(gcn.haploid_number) END AS haploid_number
+ , ta.chromosome
+ , ta.na_sequence_id
+ , io.input_pan_id
+ , io.output_pan_id
+ FROM apidb.genecopynumber gcn
+ , study.protocolappnode pan
+ , TranscriptAttributes ta
+ , PANIo io
+ WHERE gcn.protocol_app_node_id = pan.protocol_app_node_id
+ AND gcn.na_feature_id = ta.gene_na_feature_id
+ AND gcn.protocol_app_node_id = io.output_pan_id
+ AND (ta.gene_type = 'protein coding' or ta.gene_type = 'protein coding gene')
+ ]]>
+ ]]>
@@ -2586,6 +8634,90 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
+
+ for TranscriptAttributes.InterproColumns
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
annotation updates from Apollo
@@ -2595,6 +8727,37 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
+
+ semicolon-delimited list of formatted genomic locations for each gene
+
+
+
+
+
+
+
+
+
@@ -2665,18 +8828,13 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
-
-
-
+
+
+
+
+
+ One phylogenetic-profile string per ortholog group
+
+
+
+
+
+
+
+
+
+
@@ -2959,6 +9145,252 @@ sub readClob {
+
+
+ Links AlphaFold entries to gene ids where Uniprot ids are directly assigned
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Stores intron support for introns
+
+
+
+
+
+
+
+ = CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= 2 /*stats.perc0005_annot_percent_max*/)
+ ) t
+ GROUP BY gene_source_id, ontology_term, intron_count
+ ) t
+ WHERE string_value = 'All'
+
+ UNION
+
+ SELECT gene_source_id
+ , ontology_term
+ , case when count(*) = intron_count THEN 'All-high'
+ when count(*) = 0 THEN 'None'
+ else 'Any-high' end as string_value
+ FROM (
+ SELECT gij.gene_source_id
+ , 'intron_junction' as ontology_term
+ , intronCount.intron_count
+ FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
+ , (SELECT count (*) as intron_count, source_id FROM apidbtuning.genemodeldump WHERE type = 'Intron' GROUP BY source_id) intronCount
+ WHERE gij.gene_source_id = intronCount.source_id
+ AND gij.na_sequence_id = stats.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= 2)
+ ) t2
+ GROUP BY gene_source_id, ontology_term, intron_count
+
+ UNION
+
+ SELECT gene_source_id
+ , ontology_term
+ , CASE WHEN count(*) = intron_count THEN 'All-low'
+ WHEN count(*) = 0 THEN 'None'
+ ELSE 'Any-low' END as string_value
+ FROM (
+ SELECT gij.gene_source_id
+ , 'intron_junction' as ontology_term
+ , intronCount.intron_count
+ FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
+ , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
+ WHERE gij.gene_source_id = intronCount.source_id
+ AND gij.na_sequence_id = stats.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max)
+ AND gij.intron_feature_id not in (
+ SELECT gij.intron_feature_id
+ FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats
+ WHERE gij.na_sequence_id = stats.na_sequence_id
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= 2)
+ )
+ ) t3
+ GROUP BY gene_source_id, ontology_term, intron_count
+
+ UNION
+
+ SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-low') as string_value
+ FROM (
+ SELECT gene_source_id
+ , ontology_term
+ , case when count(*) = intron_count THEN 'All'
+ when count(*) = 0 THEN 'None'
+ else 'Any' end as string_value
+ FROM (
+ SELECT gij.gene_source_id
+ , 'intron_junction' as ontology_term
+ , intronCount.intron_count
+ FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
+ , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
+ WHERE gij.gene_source_id = intronCount.source_id
+ AND gij.na_sequence_id = stats.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max)
+ AND gij.intron_feature_id not in (
+ SELECT gij.intron_feature_id
+ FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats
+ WHERE gij.na_sequence_id = stats.na_sequence_id
+ AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
+ AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
+ AND (gij.contained = 0 or gij.percent_max >= 2)
+ )
+ ) t
+ GROUP BY gene_source_id, ontology_term, intron_count
+ ) t4
+ WHERE string_value = 'All'
+ ) t
+]]>
+
+
+
all products for each gene
From aa89ddb6547c7c8d168e87e34fdc6bfe7c2374d4 Mon Sep 17 00:00:00 2001
From: Richard Demko
Date: Mon, 9 Jun 2025 12:07:33 -0400
Subject: [PATCH 106/112] Resolving syntax error
---
.../lib/psql/webready/comparative/GeneOrthologGroup.psql | 8 +++-----
.../webready/comparative/TranscriptOrthologGroup.psql | 8 +++-----
2 files changed, 6 insertions(+), 10 deletions(-)
diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
index 4c30361e4d..404c23f4dc 100644
--- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
+++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
@@ -1,10 +1,8 @@
create table :SCHEMA.GeneOrthologGroup as
SELECT pa.gene_source_id AS gene_id,
- ogas.group_id,
+ ogas.group_id
FROM webready.proteinattributes pa,
apidb.orthologgroupaasequence ogas
- WHERE pa.aa_sequence_id = ogas.aa_sequence_id
-;
+ WHERE pa.aa_sequence_id = ogas.aa_sequence_id;
alter table :SCHEMA.GeneOrthologGroup
- add constraint GeneOrthologGroup_pk primary key (gene_id)
-;
+ add constraint GeneOrthologGroup_pk primary key (gene_id);
diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
index 5362c8465e..d6fd747109 100644
--- a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
+++ b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
@@ -1,10 +1,8 @@
create table :SCHEMA.TranscriptOrthologGroup as
SELECT ta.source_id AS source_id,
- ogas.group_id,
+ ogas.group_id
FROM webready.transcriptattributes ta,
apidb.orthologgroupaasequence ogas
- WHERE ta.aa_sequence_id = ogas.aa_sequence_id
-;
+ WHERE ta.aa_sequence_id = ogas.aa_sequence_id;
alter table :SCHEMA.TranscriptOrthologGroup
- add constraint TranscriptOrthologGroup_pk primary key (source_id)
-;
+ add constraint TranscriptOrthologGroup_pk primary key (source_id);
From 758fa2d2bae166c388a89a4ca49b251158e35cc5 Mon Sep 17 00:00:00 2001
From: Richard Demko
Date: Mon, 9 Jun 2025 12:20:42 -0400
Subject: [PATCH 107/112] Resolving duplicate row issue
---
Model/lib/psql/webready/comparative/GeneOrthologGroup.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
index 404c23f4dc..a02ec0a2f2 100644
--- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
+++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
@@ -1,5 +1,5 @@
create table :SCHEMA.GeneOrthologGroup as
- SELECT pa.gene_source_id AS gene_id,
+ SELECT distinct(pa.gene_source_id) AS gene_id,
ogas.group_id
FROM webready.proteinattributes pa,
apidb.orthologgroupaasequence ogas
From 4b24d0649417f22d4c8e52bf5c37c0778ce7bba4 Mon Sep 17 00:00:00 2001
From: Richard Demko
Date: Mon, 9 Jun 2025 12:57:16 -0400
Subject: [PATCH 108/112] Resolving primary key issue
---
Model/lib/psql/webready/comparative/GeneOrthologGroup.psql | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
index a02ec0a2f2..d2f229f099 100644
--- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
+++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
@@ -1,8 +1,9 @@
- create table :SCHEMA.GeneOrthologGroup as
- SELECT distinct(pa.gene_source_id) AS gene_id,
+ create table :SCHEMA.GeneOrthologGroup as
+ SELECT DISTINCT pa.gene_source_id AS gene_id,
ogas.group_id
FROM webready.proteinattributes pa,
apidb.orthologgroupaasequence ogas
WHERE pa.aa_sequence_id = ogas.aa_sequence_id;
alter table :SCHEMA.GeneOrthologGroup
- add constraint GeneOrthologGroup_pk primary key (gene_id);
+ ADD CONSTRAINT GeneOrthologGroup_pk PRIMARY KEY (gene_id, group_id);
+
From d25cdba6584bffb723d8374cf9f835d3ef4fe42e Mon Sep 17 00:00:00 2001
From: Richard Demko
Date: Mon, 9 Jun 2025 14:43:37 -0400
Subject: [PATCH 109/112] Adding distinct full_id to proteinSequenceGroup
---
Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql
index bfdd39fcc1..de2df5a828 100644
--- a/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql
+++ b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql
@@ -1,6 +1,6 @@
create table :SCHEMA.ProteinSequenceGroup as
SELECT
- aas.source_id AS full_id,
+ distinct(aas.source_id) AS full_id,
aas.source_id,
aas.aa_sequence_id,
length(aas.sequence) as length,
From e20cbdb5a61bca2102f101594356b55e981f2c9e Mon Sep 17 00:00:00 2001
From: Sufen Hu
Date: Mon, 16 Jun 2025 11:56:18 -0400
Subject: [PATCH 110/112] Avoid hardcoding webready; use :SCHEMA for better
flexibility
---
Model/lib/psql/webready/comparative/GeneOrthologGroup.psql | 2 +-
.../lib/psql/webready/comparative/TranscriptOrthologGroup.psql | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
index d2f229f099..1f895fde29 100644
--- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
+++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
@@ -1,7 +1,7 @@
create table :SCHEMA.GeneOrthologGroup as
SELECT DISTINCT pa.gene_source_id AS gene_id,
ogas.group_id
- FROM webready.proteinattributes pa,
+ FROM :SCHEMA.proteinattributes pa,
apidb.orthologgroupaasequence ogas
WHERE pa.aa_sequence_id = ogas.aa_sequence_id;
alter table :SCHEMA.GeneOrthologGroup
diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
index d6fd747109..fcb9f3a012 100644
--- a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
+++ b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
@@ -1,7 +1,7 @@
create table :SCHEMA.TranscriptOrthologGroup as
SELECT ta.source_id AS source_id,
ogas.group_id
- FROM webready.transcriptattributes ta,
+ FROM :SCHEMA.transcriptattributes ta,
apidb.orthologgroupaasequence ogas
WHERE ta.aa_sequence_id = ogas.aa_sequence_id;
alter table :SCHEMA.TranscriptOrthologGroup
From 6af3a1bf5e7b6134c02bcc59517b25b0cf12eb82 Mon Sep 17 00:00:00 2001
From: steve-fischer-200
Date: Wed, 25 Jun 2025 13:51:15 -0400
Subject: [PATCH 111/112] use PG remote schemas
---
.../tuningManager/apiTuningManager-pruned.xml | 53 +++++++++----------
1 file changed, 24 insertions(+), 29 deletions(-)
diff --git a/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml b/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml
index 2dd94f6212..587655a7df 100644
--- a/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml
+++ b/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml
@@ -303,18 +303,14 @@
@@ -2825,23 +2821,22 @@ sub readClob {
, au.creationdate as creation_date
, au.apolloevidencecode as evidence_code
, au.apolloevidencecodeassignment as evidence_code_parameter
- , 'TODO add owner details' as apollo_owner_details
- --, (select firstname.value || ' ' || lastname.value || ', ' || address.value from
- -- (select uap.value
- -- from useraccounts.accounts@acctdbn.profile uaa, useraccounts.account_properties@acctdbn.profile uap
- -- where uaa.user_id = uap.user_id
- -- and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1)
- -- and uap.key = 'first_name') firstname,
- -- (select uap.value
- -- from useraccounts.accounts@acctdbn.profile uaa, useraccounts.account_properties@acctdbn.profile uap
- -- where uaa.user_id = uap.user_id
- -- and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1)
- -- and uap.key = 'last_name') lastname,
- -- (select uap.value
- -- from useraccounts.accounts@acctdbn.profile uaa, useraccounts.account_properties@acctdbn.profile uap
- -- where uaa.user_id = uap.user_id
- -- and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1)
- -- and uap.key = 'organization') address) as apollo_owner_details
+ , (select firstname.value || ' ' || lastname.value || ', ' || address.value from
+ (select uap.value
+ from &remote_useraccounts_schema.accounts.profile uaa, &remote_useraccounts_schema.account_properties.profile uap
+ where uaa.user_id = uap.user_id
+ and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1)
+ and uap.key = 'first_name') firstname,
+ (select uap.value
+ from &remote_useraccounts_schema.accounts.profile uaa, &remote_useraccounts_schema.account_properties.profile uap
+ where uaa.user_id = uap.user_id
+ and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1)
+ and uap.key = 'last_name') lastname,
+ (select uap.value
+ from &remote_useraccounts_schema.accounts.profile uaa, &remote_useraccounts_schema.account_properties.profile uap
+ where uaa.user_id = uap.user_id
+ and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1)
+ and uap.key = 'organization') address) as apollo_owner_details
FROM ApolloUpdate au, GeneAttributes ga, TranscriptAttributes ta
WHERE au.type = 'gene'
AND (au.attr like '%gene_product=%'
From 3d014bcedb7743bbc072d3971b5c3d43ebead428 Mon Sep 17 00:00:00 2001
From: John Brestelli <39096257+jbrestel@users.noreply.github.com>
Date: Thu, 17 Jul 2025 15:04:47 -0400
Subject: [PATCH 112/112] JB KC Webtables (#86)
* remove unused filter
* add GeneProduct table
* only delete for this org
* add splicesitetranscript
* First pass at PathwayEC global table
* First pass at transcript universe table
* Add some pathway details to PathwayEc
* Add partitioning columns and types, remove old code
* Remove old tuning manager code, create empty TranscriptPathway table
* Load into TranscriptEC and TranscriptPathway tables
* File renamed
* Use apidb.organism for looping
* Use a different delimiter internally in PLPGSQL blocks
* Fix SQL error
* move genemaxintrongij to genemaxjunction
* subset of Junction Tables
* move GeneIntJuncStats to GeneJunctionStats
* fix up genejunctionstats
* rename genejunction stats as this table is not about genes
* fix name
* Add missing table
* Add missing alias
* Fix issue with EC number positions and add timestamp
* PSQL for undoing TranscriptPathway
* add few indexes for transcriptpathway tables
* minor changes to hit partitions
* wip; moved some comparative things around
* remove GroupPhylogeneticProfile as it is now circular. other small changes
* add custom undo script for org specific genomics ortholog tables
* refactor intronsupportlevel
* new name for junction stats table
* Remove PathwayNodeGene SQLs, we don't need this table
* Fix garbled comments
* Load into existing partitioned table instead of creating
* Add SQL to create empty partitioned table and to truncate on undo
* low conf IntronJunction Support level should be inclusive
---------
Co-authored-by: Kathryn Crouch
---
.../comparative/GeneOrthologGroup.psql | 9 -
.../comparative/GroupPhylogeneticProfile.psql | 14 -
.../GroupPhylogeneticProfile_ix.psql | 3 -
.../comparative/LoadOrthologTables.psql | 29 ++
.../comparative/LoadPathwaysGeneTable.psql | 58 +++
.../comparative/LoadTranscriptPathway.psql | 174 ++++++++
.../webready/comparative/PhyleticPattern.psql | 2 +-
...up_ix.sql => ProteinSequenceGroup_ix.psql} | 0
.../comparative/TranscriptOrthologGroup.psql | 8 -
.../comparative/Undo_LoadOrthologTables.psql | 4 +
.../Undo_LoadPathwaysGeneTable.psql | 3 +
.../Undo_LoadTranscriptPathway.psql | 5 +
Model/lib/psql/webready/global/PathwayEC.psql | 31 ++
.../psql/webready/global/PathwayEC_ix.psql | 6 +
.../webready/orgSpecific/GeneAttributes.psql | 7 +-
.../orgSpecific/GeneIntJuncStats.psql | 25 --
.../orgSpecific/GeneIntJuncStats_ix.psql | 3 -
.../orgSpecific/GeneIntronJunction.psql | 403 ++++++++----------
.../orgSpecific/GeneIntronJunction_ix.psql | 21 +-
.../orgSpecific/GeneMaxIntronGIJ.psql | 47 --
.../orgSpecific/GeneMaxIntronGIJ_ix.psql | 3 -
.../webready/orgSpecific/GeneMaxJunction.psql | 69 +++
.../orgSpecific/GeneMaxJunction_ix.psql | 3 +
.../orgSpecific/GeneOrthologGroup.psql | 21 +
.../orgSpecific/GeneOrthologGroup_ix.psql | 2 +
.../webready/orgSpecific/GeneProduct.psql | 130 ++++++
.../webready/orgSpecific/GeneProduct_ix.psql | 2 +
.../orgSpecific/GenomicSeqAttributes.psql | 2 +-
.../orgSpecific/GenomicSeqJunctionStats.psql | 32 ++
.../GenomicSeqJunctionStats_ix.psql | 2 +
.../orgSpecific/IntronSupportLevel.psql | 168 +++-----
.../orgSpecific/JunctionGeneLocation.psql | 23 +
.../orgSpecific/JunctionGeneLocation_id.psql | 5 +
.../JunctionToCoverageProfileMapping.psql | 143 +++++++
.../JunctionToCoverageProfileMapping_ix.psql | 2 +
.../webready/orgSpecific/PathwayNodeGene.psql | 13 -
.../orgSpecific/PathwaysGeneTable.psql | 66 ++-
.../orgSpecific/SpliceSiteTranscript.psql | 33 ++
...e_ix.psql => SpliceSiteTranscript_ix.psql} | 0
.../orgSpecific/TranscriptAttributes.psql | 4 +-
.../webready/orgSpecific/TranscriptEC.psql | 35 ++
.../webready/orgSpecific/TranscriptEC_ix.psql | 5 +
.../orgSpecific/TranscriptOrthologGroup.psql | 23 +
.../TranscriptOrthologGroup_ix.psql | 2 +
.../orgSpecific/TranscriptPathway.psql | 115 ++---
.../psql/webready/unknown/NameMappingGIJ.psql | 118 -----
.../webready/unknown/NameMappingGIJ_ix.psql | 3 -
47 files changed, 1160 insertions(+), 716 deletions(-)
delete mode 100644 Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
delete mode 100644 Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
delete mode 100644 Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql
create mode 100644 Model/lib/psql/webready/comparative/LoadOrthologTables.psql
create mode 100644 Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql
create mode 100644 Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql
rename Model/lib/psql/webready/comparative/{ProteinSequenceGroup_ix.sql => ProteinSequenceGroup_ix.psql} (100%)
delete mode 100644 Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
create mode 100644 Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql
create mode 100644 Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql
create mode 100644 Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql
create mode 100644 Model/lib/psql/webready/global/PathwayEC.psql
create mode 100644 Model/lib/psql/webready/global/PathwayEC_ix.psql
delete mode 100644 Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql
delete mode 100644 Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql
delete mode 100644 Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql
delete mode 100644 Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/GeneProduct.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql
delete mode 100644 Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql
rename Model/lib/psql/webready/orgSpecific/{PathwayNodeGene_ix.psql => SpliceSiteTranscript_ix.psql} (100%)
create mode 100644 Model/lib/psql/webready/orgSpecific/TranscriptEC.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql
create mode 100644 Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql
delete mode 100644 Model/lib/psql/webready/unknown/NameMappingGIJ.psql
delete mode 100644 Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql
diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
deleted file mode 100644
index 1f895fde29..0000000000
--- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql
+++ /dev/null
@@ -1,9 +0,0 @@
- create table :SCHEMA.GeneOrthologGroup as
- SELECT DISTINCT pa.gene_source_id AS gene_id,
- ogas.group_id
- FROM :SCHEMA.proteinattributes pa,
- apidb.orthologgroupaasequence ogas
- WHERE pa.aa_sequence_id = ogas.aa_sequence_id;
- alter table :SCHEMA.GeneOrthologGroup
- ADD CONSTRAINT GeneOrthologGroup_pk PRIMARY KEY (gene_id, group_id);
-
diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
deleted file mode 100644
index dcbb9370d4..0000000000
--- a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql
+++ /dev/null
@@ -1,14 +0,0 @@
-
- CREATE table :SCHEMA.GroupPhylogeneticProfile as
- SELECT rep.orthomcl_name, pp.profile_string
- FROM apidb.PhylogeneticProfile pp,
- (SELECT orthomcl_name, max(source_id) as source_id
- FROM :SCHEMA.GeneAttributes
- GROUP BY orthomcl_name) rep
- WHERE rep.source_id = pp.source_id
-
- ;
-
-
-
-
diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql
deleted file mode 100644
index 1753b0f2db..0000000000
--- a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql
+++ /dev/null
@@ -1,3 +0,0 @@
- create index group_pp_ix
- on :SCHEMA.GroupPhylogeneticProfile (orthomcl_name)
- ;
diff --git a/Model/lib/psql/webready/comparative/LoadOrthologTables.psql b/Model/lib/psql/webready/comparative/LoadOrthologTables.psql
new file mode 100644
index 0000000000..ece00e833f
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/LoadOrthologTables.psql
@@ -0,0 +1,29 @@
+Truncate table :SCHEMA.GeneOrthologGroup;
+Truncate table :SCHEMA.TranscriptOrthologGroup;
+
+insert into :SCHEMA.GeneOrthologGroup (gene_id, group_id, project_id, org_abbrev, modification_date)
+SELECT ga.gene_source_id AS gene_id
+ , ogas.group_id
+ , ga.project_id
+ , ga.org_abbrev
+ , timestamp as modification_date
+ FROM :SCHEMA.geneattributes ga,
+ apidb.orthologgroupaasequence ogas
+ WHERE ga.aa_sequence_id = ogas.aa_sequence_id;
+
+
+
+insert into :SCHEMA.TranscriptOrthologGroup (source_id, gene_id, group_id, project_id, org_abbrev, modification_date)
+SELECT ta.source_id AS source_id
+ , ta.gene_source_id as gene_id
+ , ogas.group_id
+ , ta.project_id
+ , ta.org_abbrev
+ , timestamp as modification_date
+ FROM :SCHEMA.transcriptattributes ta
+ , apidb.orthologgroupaasequence ogas
+ WHERE ta.aa_sequence_id = ogas.aa_sequence_id;
+
+
+
+
diff --git a/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql b/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql
new file mode 100644
index 0000000000..28ffcf3b4f
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql
@@ -0,0 +1,58 @@
+/* ATTENTION: This script is run using a custom workflow step class */
+/* This accommodates the required to retain an empty table on undo */
+
+TRUNCATE TABLE :SCHEMA.PathwaysGeneTable;
+
+DO $$
+ DECLARE org record:PLPGSQL_DELIM
+ BEGIN
+ FOR org IN (SELECT DISTINCT abbrev FROM apidb.organism)
+ LOOP
+ INSERT INTO :SCHEMA.PathwaysGeneTable (
+ SELECT t2.*, current_timestamp AS modification_date FROM (
+ SELECT DISTINCT
+ gene_source_id
+ , pathway_source_id
+ , pathway_name
+ , count(reaction_source_id) AS reactions
+ , enzyme
+ , expasy_url
+ , pathway_source
+ , exact_match
+ , project_id
+ , org_abbrev
+ FROM (
+ SELECT DISTINCT
+ tp.gene_source_id
+ , tp.project_id
+ , tp.pathway_source_id
+ , tp.pathway_name
+ , tp.org_abbrev
+ , pr.reaction_source_id
+ , pr.enzyme
+ , pr.expasy_url
+ , tp.pathway_source
+ , CASE MAX(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match
+ FROM :SCHEMA.TranscriptPathway tp
+ , apidbtuning.PathwayAttributes pa
+ , apidbtuning.PathwayCompounds pc
+ , apidbtuning.PathwayReactions pr
+ WHERE tp.pathway_id = pa.pathway_id
+ AND pc.pathway_id = pa.pathway_id
+ AND pr.reaction_id = pc.reaction_id
+ AND pr.ext_db_name = pc.ext_db_name
+ AND tp.ec_number_pathway = pr.enzyme
+ AND tp.wildcard_count_gene <= tp.wildcard_count_pathway
+ AND pr.enzyme != '-.-.-.-'
+ AND tp.org_abbrev = org.abbrev
+ GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrev, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
+ ) t
+ GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match
+ ) t2
+ ORDER BY pathway_source, lower(pathway_name)
+ ):PLPGSQL_DELIM
+ COMMIT:PLPGSQL_DELIM
+ END LOOP:PLPGSQL_DELIM
+ END:PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL;
+
diff --git a/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql b/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql
new file mode 100644
index 0000000000..afd718fb47
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql
@@ -0,0 +1,174 @@
+/* ATTENTION: This script is run using a custom workflow step class */
+/* This accommodates the requirement to retain empty TranscriptEC and TranscriptPathway tables on undo */
+
+/* STEP 1: Make sure temp tables have been dropped before starting */
+
+DROP TABLE IF EXISTS :SCHEMA.TranscriptEcUniverse;
+DROP TABLE IF EXISTS :SCHEMA.PathwayEcUniverse;
+DROP TABLE IF EXISTS :SCHEMA.TranscriptPathwayEcMatch;
+
+-- Just to be safe add these here
+TRUNCATE TABLE :SCHEMA.TranscriptPathway;
+TRUNCATE TABLE :SCHEMA.TranscriptEC;
+
+
+/* STEP 2: Load into the TranscriptEC table. This must be done here so that we capture ortho-derived EC numbers*/
+
+/* ATTENTION: this step loads into an empty partitioned table created in the orgSpecific graph */
+
+DO $$
+ DECLARE org RECORD:PLPGSQL_DELIM
+ BEGIN
+ FOR org IN (SELECT DISTINCT taxon_id, abbrev from apidb.organism)
+ LOOP
+ INSERT INTO :SCHEMA.TranscriptEc (
+ SELECT DISTINCT
+ ta.source_id
+ , ta.gene_source_id
+ , ec.enzyme_class_id
+ , ec.ec_number
+ , ec.ec_number_1
+ , ec.ec_number_2
+ , ec.ec_number_3
+ , ec.ec_number_4
+ , regexp_count(ec.ec_number, '-') as wildcard_count
+ , asec.evidence_code
+ , ta.project_id
+ , org.abbrev as org_abbrev
+ , current_timestamp as modification_date
+ FROM sres.EnzymeClass ec
+ , dots.AaSequenceEnzymeClass asec
+ , :SCHEMA.transcriptattributes ta
+ WHERE asec.aa_sequence_id = ta.aa_sequence_id
+ AND asec.enzyme_class_id = ec.enzyme_class_id
+ AND ta.org_abbrev = org.abbrev
+ ):PLPGSQL_DELIM
+ COMMIT:PLPGSQL_DELIM
+ END LOOP:PLPGSQL_DELIM
+ END:PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL;
+
+
+/* STEP 3: Extracts the distinct EC numbers from TranscriptEC */
+/* This represents the "universe" of EC numbers associated to transcripts */
+/* Temp table, will be dropped */
+
+CREATE TABLE :SCHEMA.TranscriptEcUniverse as (
+ SELECT DISTINCT
+ enzyme_class_id
+ , ec_number
+ , ec_number_1
+ , ec_number_2
+ , ec_number_3
+ , ec_number_4
+ , wildcard_count
+ FROM :SCHEMA.TranscriptEc
+);
+
+/* STEP 4: Extract the distinct EC number from PathwayEC */
+/* This represents the "universe" of EC numbers associated to pathways */
+/* Temp table, will be dropped */
+
+CREATE TABLE :SCHEMA.PathwayEcUniverse as (
+ SELECT DISTINCT
+ enzyme_class_id
+ , ec_number
+ , ec_number_1
+ , ec_number_2
+ , ec_number_3
+ , ec_number_4
+ , wildcard_count
+ FROM :SCHEMA.PathwayEc
+);
+
+
+/* STEP 5: Match EC numbers from the transcript universe and EC numbers from the pathway universe */
+/* Use the universe tables to avoid redundancy */
+/* Temp table, will be dropped */
+
+CREATE TABLE :SCHEMA.TranscriptPathwayEcMatch as (
+ SELECT DISTINCT
+ teu.enzyme_class_id AS transcript_enzyme_class_id
+ , peu.enzyme_class_id AS pathway_enzyme_class_id
+ , teu.wildcard_count AS wildcard_count_transcript
+ , peu.wildcard_count AS wildcard_count_pathway
+ , teu.ec_number AS ec_number_transcript
+ , peu.ec_number AS ec_number_pathway
+ FROM :SCHEMA.TranscriptEcUniverse teu
+ , :SCHEMA.PathwayEc peu
+
+ -- this part does ec number expansion using the individual digits to avoid slow like syntax
+ WHERE (teu.ec_number_1 = peu.ec_number_1 or teu.ec_number_1 is null or peu.ec_number_1 is null)
+ AND (teu.ec_number_2 = peu.ec_number_2 or teu.ec_number_2 is null or peu.ec_number_2 is null)
+ AND (teu.ec_number_3 = peu.ec_number_3 or teu.ec_number_3 is null or peu.ec_number_3 is null)
+ AND (teu.ec_number_4 = peu.ec_number_4 or teu.ec_number_4 is null or peu.ec_number_4 is null)
+);
+
+
+/* STEP 6: Map the matched EC numbers to map back to both pathways and transcripts */
+/* ATTENTION: this step loads into an empty partitioned table created in the orgSpecific graph */
+
+/* This is the equivalent of the old TranscriptPathway tuning table */
+
+DO $$
+ DECLARE org RECORD:PLPGSQL_DELIM
+ BEGIN
+ FOR org IN (SELECT DISTINCT taxon_id, abbrev FROM apidb.organism)
+ LOOP
+ INSERT INTO :SCHEMA.transcriptpathway (
+ SELECT DISTINCT
+
+ -- gene info
+ ta.source_id
+ , ta.gene_source_id
+
+ -- pathway info
+ , pa.source_id AS pathway_source_id
+ , pa.name AS pathway_name
+ , pa.pathway_id
+ , pa.pathway_source
+ , pec.external_database_release_id
+
+ -- info about match
+ , tpem.ec_number_transcript AS ec_number_gene
+ , tpem.wildcard_count_transcript AS wildcard_count_gene
+ , tpem.ec_number_pathway
+ , tpem.wildcard_count_pathway
+ , CASE WHEN tpem.ec_number_pathway = tpem.ec_number_transcript
+ THEN 1
+ ELSE 0 END AS exact_match
+ , CASE WHEN tpem.wildcard_count_pathway + tpem.wildcard_count_transcript = 0
+ THEN 1
+ ELSE 0 END AS complete_ec
+
+ -- for partitioning
+ , ta.project_id
+ , org.abbrev AS org_abbrev
+ , current_timestamp AS modification_date
+ FROM :SCHEMA.TranscriptPathwayEcMatch tpem
+ , :SCHEMA.PathwayEc pec
+ , :SCHEMA.PathwayAttributes pa
+ , :SCHEMA.TranscriptAttributes ta
+ , :SCHEMA.TranscriptEc tec
+ WHERE tpem.ec_number_transcript = tec.ec_number
+ AND tpem.ec_number_pathway = pec.ec_number
+ AND pa.pathway_id = pec.pathway_id
+ AND ta.source_id = tec.source_id
+ AND tec.org_abbrev = org.abbrev
+ AND ta.org_abbrev = org.abbrev
+ -- JB: It is unclear what the intention here was. This will not remove any rows and shouldn't
+ -- AND (
+ -- (ta.orthomcl_name IS NULL AND tec.evidence_code != 'OrthoMCLDerived')
+ -- OR ta.orthomcl_name IS NOT NULL
+ -- )
+ ):PLPGSQL_DELIM
+ COMMIT:PLPGSQL_DELIM
+ END LOOP:PLPGSQL_DELIM
+ END:PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL;
+
+
+/* STEP 7: Delete temp tables */
+DROP TABLE :SCHEMA.TranscriptEcUniverse;
+DROP TABLE :SCHEMA.PathwayEcUniverse;
+DROP TABLE :SCHEMA.TranscriptPathwayEcMatch;
diff --git a/Model/lib/psql/webready/comparative/PhyleticPattern.psql b/Model/lib/psql/webready/comparative/PhyleticPattern.psql
index 6b5f20ca3c..1fad7add7c 100644
--- a/Model/lib/psql/webready/comparative/PhyleticPattern.psql
+++ b/Model/lib/psql/webready/comparative/PhyleticPattern.psql
@@ -1,6 +1,6 @@
-CREATE TABLE PhyleticPattern AS
+CREATE TABLE :SCHEMA.PhyleticPattern AS
(SELECT actual.group_name,
actual.alveolata as alveolata_actual,
total.alveolata as alveolata_total,
diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.psql
similarity index 100%
rename from Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql
rename to Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.psql
diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
deleted file mode 100644
index fcb9f3a012..0000000000
--- a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql
+++ /dev/null
@@ -1,8 +0,0 @@
- create table :SCHEMA.TranscriptOrthologGroup as
- SELECT ta.source_id AS source_id,
- ogas.group_id
- FROM :SCHEMA.transcriptattributes ta,
- apidb.orthologgroupaasequence ogas
- WHERE ta.aa_sequence_id = ogas.aa_sequence_id;
- alter table :SCHEMA.TranscriptOrthologGroup
- add constraint TranscriptOrthologGroup_pk primary key (source_id);
diff --git a/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql b/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql
new file mode 100644
index 0000000000..9a50c36b62
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql
@@ -0,0 +1,4 @@
+/* This script truncates the tables loaded by LoadTranscriptPathway */
+/* The table is not dropped and the partitions are retained */
+Truncate table :SCHEMA.GeneOrthologGroup;
+Truncate table :SCHEMA.TranscriptOrthologGroup;
diff --git a/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql b/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql
new file mode 100644
index 0000000000..6c838ced50
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql
@@ -0,0 +1,3 @@
+/* This script truncates the tables loaded by LoadPathwaysGeneTable */
+
+TRUNCATE TABLE :SCHEMA.PathwaysGeneTable;
diff --git a/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql b/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql
new file mode 100644
index 0000000000..329c22d305
--- /dev/null
+++ b/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql
@@ -0,0 +1,5 @@
+/* This script truncates the tables loaded by LoadTranscriptPathway */
+/* The table is not dropped and the partitions are retained */
+
+TRUNCATE TABLE :SCHEMA.TranscriptPathway;
+TRUNCATE TABLE :SCHEMA.TranscriptEC;
diff --git a/Model/lib/psql/webready/global/PathwayEC.psql b/Model/lib/psql/webready/global/PathwayEC.psql
new file mode 100644
index 0000000000..6a688da186
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayEC.psql
@@ -0,0 +1,31 @@
+ drop table if exists :SCHEMA.PathwayEC;
+
+ /* this table represents the universe of EC numbers that are associated with enzyme nodes in metabolic pathways*/
+
+ CREATE TABLE :SCHEMA.PathwayEC as
+
+ SELECT DISTINCT ec.enzyme_class_id -- use this for joining back to pathways later
+ , ec.ec_number -- useful for quick exact matches
+ , ec.ec_number_1 --have the 4 EC number components separately avoids lots of like statements later
+ , ec.ec_number_2
+ , ec.ec_number_3
+ , ec.ec_number_4
+ , regexp_count(ec.ec_number, '-') as wildcard_count -- how many of the enzyme number positions are unknown
+ , pn.pathway_id
+ , p.external_database_release_id
+
+ FROM sres.EnzymeClass ec
+ , sres.OntologyTerm ot
+ , sres.PathwayNode pn
+ , sres.Pathway p
+ -- find all pathway nodes representing enzymes
+
+ WHERE pn.pathway_node_type_id = ot.ontology_term_id
+ AND ot.name = 'enzyme'
+ AND p.pathway_id = pn.pathway_id
+ -- we don't want the root or this gets matched to everything!
+ AND pn.display_label != '-.-.-.-'
+ -- now we can get the enzyme details for the enzyme pathway nodes
+ AND ec.enzyme_class_id = pn.row_id
+ ;
+
diff --git a/Model/lib/psql/webready/global/PathwayEC_ix.psql b/Model/lib/psql/webready/global/PathwayEC_ix.psql
new file mode 100644
index 0000000000..0c652289a8
--- /dev/null
+++ b/Model/lib/psql/webready/global/PathwayEC_ix.psql
@@ -0,0 +1,6 @@
+CREATE INDEX PathwayEC_1_idx ON :SCHEMA.PathwayEC (pathway_id, ec_number, external_database_release_id)
+ ;
+
+
+CREATE INDEX PathwayEC_2_idx ON :SCHEMA.PathwayEC (ec_number_1, ec_number_2, ec_number_3, ec_number_4, enzyme_class_id, ec_number, wildcard_count)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
index 9f8295b7cc..7ee49f52cd 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql
@@ -12,7 +12,7 @@
, sequence_id
, gene_name AS name
, COALESCE(aggregates.product, aggregates.transcript_product) as old_product
- -- , COALESCE(gp.product, 'unspecified product') as product
+ , COALESCE(gp.product, 'unspecified product') as product
, 'FIX ME' as product
, gene_type
, gene_ebi_biotype
@@ -88,9 +88,8 @@
OR d.name like '%_dbxref_uniprot_from_annotation_RSRC')
) t
GROUP BY na_feature_id
- ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id
--- LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id
- WHERE ta.org_abbrev = ':ORG_ABBREV'
+ ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id and ta.org_abbrev = ':ORG_ABBREV'
+ LEFT JOIN :SCHEMA.GeneProduct gp ON ta.gene_source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV'
ORDER BY ta.gene_source_id
:DECLARE_PARTITION;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql
deleted file mode 100644
index b8a8e7dbcf..0000000000
--- a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql
+++ /dev/null
@@ -1,25 +0,0 @@
-:CREATE_AND_POPULATE
-
-
- CREATE TABLE GeneIntJuncStats AS
- WITH org_tot AS (
- SELECT
- gs.organism,
- min(gij.total_unique) as min_annot_score, PERCENTILE_cont(0.005) within group (order by gij.total_unique asc) as perc005_annot_score,
- PERCENTILE_cont(0.01) within group (order by gij.total_unique asc) as perc01_annot_score,
- min(gij.percent_max) as min_annot_percent_max, PERCENTILE_cont(0.0001) within group (order by gij.percent_max asc) as perc0001_annot_percent_max,
- PERCENTILE_cont(0.0005) within group (order by gij.percent_max asc) as perc0005_annot_percent_max,
- floor(max(gij.segment_end - gij.segment_start) * 1.25) as max_intron_length
- FROM geneintronjunction gij, genomicseqattributes gs
- WHERE gs.na_sequence_id = gij.na_sequence_id
- AND gij.annotated_intron = 'Yes'
- GROUP BY gs.organism
- )
- SELECT gs.na_sequence_id, gs.source_id, ot.*
- FROM genomicseqattributes gs, org_tot ot
- WHERE gs.organism = ot.organism
- AND gs.na_sequence_id in (SELECT DISTINCT na_sequence_id FROM apidb.intronjunction)
-
-
-:DECLARE_PARTITION;
-
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql
deleted file mode 100644
index 89d3004344..0000000000
--- a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql
+++ /dev/null
@@ -1,3 +0,0 @@
- create index GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id)
- ;
-
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
index 9cf983cd01..39837511c8 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql
@@ -1,223 +1,192 @@
-
-
- CREATE UNLOGGED TABLE PanIOgij AS
- SELECT i.protocol_app_id, i.protocol_app_node_id as input_pan_id, o.protocol_app_node_id as output_pan_id
- FROM study.output o, study.input i
- WHERE
- o.protocol_app_node_id in (
- SELECT DISTINCT protocol_app_node_id
- FROM study.protocolappnode where name like '%junctions%' or name like '%htseq%'
- )
- AND o.protocol_app_id = i.protocol_app_id
-
- ;
-
-
-
- CREATE UNLOGGED TABLE annotgij (
- na_sequence_id,
- start_min,
- end_max,
- is_reversed,
- feature_type
- ) AS
- SELECT il.na_sequence_id, il.start_min,il.end_max, il.is_reversed, 'Intron' as feature_type
- FROM apidb.IntronLocation il
- GROUP by il.na_sequence_id, il.start_min,il.end_max,il.is_reversed
-
- ;
-
-
-
- CREATE UNIQUE INDEX :ORG_ABBREV_annottmpnew_pk_ix ON annotgij (na_sequence_id,start_min,end_max,is_reversed,feature_type)
-
-
- ;
-
-
-
- create table GeneIdLocGIJ (
- na_sequence_id NUMERIC(10),
- start_min NUMERIC,
- is_reversed NUMERIC,
- end_max NUMERIC,
- na_feature_id NUMERIC(10),
- source_id varchar(100),
- total_expression NUMERIC
- )
-
- ;
-
-
-
- CREATE UNIQUE INDEX :ORG_ABBREV_gnattidloc_pk_ix ON GeneIdLocGIJ (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression)
-
-
- ;
-
-
-
- DO $$
- DECLARE
- idlist RECORD;
- BEGIN
- FOR idlist IN ( SELECT DISTINCT na_sequence_id FROM apidb.intronjunction)
- LOOP
- INSERT INTO GeneIdLocGIJ (
- SELECT gf.na_sequence_id,l.start_min,l.is_reversed,l.end_max,gf.na_feature_id,gf.source_id,
- round(sum(nafe.value)::NUMERIC,2) as total_expression
- FROM dots.genefeature gf, dots.nalocation l, namemappinggij je, results.nafeatureexpression nafe
- WHERE gf.na_sequence_id = idlist.na_sequence_id
- AND l.na_feature_id = gf.na_feature_id
- AND gf.na_feature_id = nafe.na_feature_id
- AND nafe.protocol_app_node_id = je.exp_pan_id
- GROUP BY gf.na_sequence_id,l.start_min,l.is_reversed,l.end_max,gf.na_feature_id,gf.source_id
- );
- commit;
- END LOOP;
- END;
- $$ LANGUAGE PLPGSQL;
-
- ;
-
-
-
- create index :ORG_ABBREV_gnidloc_nafid_ix on GeneIdLocGIJ (na_feature_id)
+drop table if exists :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp;
+
+drop table if exists :SCHEMA.:ORG_ABBREVGIJtmp;
+
+/*
+ Distinct Annotated Intron Locations
+*/
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp (
+ na_sequence_id,
+ start_min,
+ end_max,
+ is_reversed,
+ feature_type
+) AS
+ SELECT il.na_sequence_id
+ , il.start_min
+ , il.end_max
+ , il.is_reversed
+ , 'Intron' as feature_type
+ FROM apidb.IntronLocation il
+ , dots.nasequence s
+ WHERE il.na_sequence_id = s.na_sequence_id
+ AND s.taxon_id = :TAXON_ID
+ GROUP by il.na_sequence_id
+ , il.start_min
+ , il.end_max
+ , il.is_reversed
+
+ ;
+
+CREATE UNIQUE INDEX :ORG_ABBREV_annottmpnew_pk_ix ON :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp (na_sequence_id,start_min,end_max,is_reversed,feature_type)
;
-
-
- create UNLOGGED table GIJtmp (
- NA_SEQUENCE_ID NUMERIC(10),
- SEQUENCE_SOURCE_ID VARCHAR(100),
- SEGMENT_START NUMERIC,
- SEGMENT_END NUMERIC,
- TOTAL_UNIQUE NUMERIC,
- TOTAL_ISRPM NUMERIC,
- IS_REVERSED NUMERIC(1),
- INTRON_FEATURE_ID VARCHAR(200),
- MATCHES_GENE_STRAND NUMERIC,
- GENE_SOURCE_ID VARCHAR(100),
- GENE_NA_FEATURE_ID NUMERIC,
- ANNOTATED_INTRON VARCHAR(10)
+/*
+ this table does the heavy lifting for gene intron junction calculations
+*/
+create UNLOGGED table :SCHEMA.:ORG_ABBREVGIJtmp (
+ NA_SEQUENCE_ID NUMERIC(10),
+ SEQUENCE_SOURCE_ID VARCHAR(100),
+ SEGMENT_START NUMERIC,
+ SEGMENT_END NUMERIC,
+ TOTAL_UNIQUE NUMERIC,
+ TOTAL_ISRPM NUMERIC,
+ IS_REVERSED NUMERIC(1),
+ INTRON_FEATURE_ID VARCHAR(200),
+ MATCHES_GENE_STRAND NUMERIC,
+ GENE_SOURCE_ID VARCHAR(100),
+ GENE_NA_FEATURE_ID NUMERIC,
+ ANNOTATED_INTRON VARCHAR(10)
+)
+ ;
+
+
+DO $$
+ DECLARE
+ iter_length numeric := 4999 :PLPGSQL_DELIM
+ i_first_pos numeric := 1 :PLPGSQL_DELIM
+ i_last_pos numeric := i_first_pos + iter_length :PLPGSQL_DELIM
+ idlist RECORD :PLPGSQL_DELIM
+ BEGIN
+ FOR idlist IN (
+ SELECT na_sequence_id
+ , source_id as sequence_source_id
+ , length
+ , taxon_id
+ , CASE WHEN step_mult > 500000 THEN 500000 ELSE step_mult END as seq_step_mult
+ FROM (
+ SELECT gs.na_sequence_id
+ , gs.source_id
+ , gs.length
+ , gs.taxon_id
+ , 25000 * (1 + floor(gs.length/count(*))) as step_mult
+ FROM apidb.intronjunction ij
+ , dots.nasequence gs
+ WHERE gs.na_sequence_id = ij.na_sequence_id
+ AND gs.taxon_id = :TAXON_ID
+ GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id
)
-
- ;
-
-
-
- DO $$
- DECLARE
- iter_length numeric := 4999;
- i_first_pos numeric := 1;
- i_last_pos numeric := i_first_pos + iter_length;
- idlist RECORD;
- BEGIN
- FOR idlist IN (
- SELECT na_sequence_id, length, taxon_id, CASE WHEN step_mult > 500000 THEN 500000 ELSE step_mult END as seq_step_mult
- FROM (
- SELECT gs.na_sequence_id, gs.length, gs.taxon_id, 25000 * (1 + floor(gs.length/count(*))) as step_mult
- FROM apidb.intronjunction ij, dots.nasequence gs
- WHERE gs.na_sequence_id = ij.na_sequence_id
- GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id
- ) t
- ORDER BY taxon_id
- )
- LOOP
- iter_length := idlist.seq_step_mult;
- i_first_pos := 1;
- i_last_pos := i_first_pos + iter_length;
- WHILE i_first_pos < idlist.length
- LOOP
- INSERT INTO GIJtmp
- SELECT DISTINCT
- junc.*,
- CASE
- WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed
- THEN 1
- ELSE 0
- END as matches_gene_strand,
- last_value(ga.source_id) over w1 as gene_source_id,
- last_value(ga.na_feature_id) over w1 as gene_na_feature_id,
- CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron
- FROM (
- SELECT ij.na_sequence_id,seq.source_id as sequence_source_id,ij.segment_start,ij.segment_end,
- sum(ij.unique_reads) as total_unique, round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm,
- ij.is_reversed,seq.source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id
- FROM apidb.intronjunction ij, namemappinggij je, dots.nasequence seq
- WHERE ij.na_sequence_id = idlist.na_sequence_id
- AND ij.segment_start between i_first_pos and i_last_pos
- AND ij.na_sequence_id = seq.na_sequence_id
- AND ij.unique_reads >= 1
- AND je.junctions_pan_id = ij.protocol_app_node_id
- AND je.multiplier < 20
- GROUP BY ij.na_sequence_id,ij.segment_start,ij.segment_end, ij.is_reversed,seq.source_id
- ) junc
- LEFT JOIN GeneIdLocGIJ ga ON
- junc.na_sequence_id = ga.na_sequence_id
- AND junc.segment_start >= ga.start_min
- AND junc.segment_end <= ga.end_max
- AND junc.is_reversed = ga.is_reversed
- LEFT JOIN annotgij ag ON
- junc.na_sequence_id = ag.na_sequence_id
- AND junc.segment_start = ag.start_min
- AND junc.segment_end = ag.end_max
- AND junc.is_reversed = ag.is_reversed
- WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron')
- WINDOW w1 AS (
- PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type
- ORDER BY ga.total_expression ASC
- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
- )
- ;
- commit;
- i_first_pos := i_last_pos + 1;
- i_last_pos := i_first_pos + iter_length;
- END LOOP;
- END LOOP;
- END;
- $$ LANGUAGE PLPGSQL;
-
- ;
-
-
-
- create index :ORG_ABBREV_gijtmp_gnscid_ix on gijtmp (gene_source_id)
-
- ;
+ )
+ LOOP
+ iter_length := idlist.seq_step_mult :PLPGSQL_DELIM
+ i_first_pos := 1 :PLPGSQL_DELIM
+ i_last_pos := i_first_pos + iter_length :PLPGSQL_DELIM
+ WHILE i_first_pos < idlist.length
+ LOOP
+ INSERT INTO :SCHEMA.:ORG_ABBREVGIJtmp
+ SELECT DISTINCT
+ junc.*,
+ CASE
+ WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed
+ THEN 1
+ ELSE 0
+ END as matches_gene_strand,
+ last_value(ga.source_id) over w1 as gene_source_id,
+ last_value(ga.na_feature_id) over w1 as gene_na_feature_id,
+ CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron
+ FROM (
+ SELECT ij.na_sequence_id
+ , idlist.sequence_source_id
+ , ij.segment_start
+ , ij.segment_end
+ , sum(ij.unique_reads) as total_unique
+ , round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm
+ , ij.is_reversed
+ , idlist.sequence_source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id
+ FROM apidb.intronjunction ij
+ , :SCHEMA.JunctionToCoverageProfileMapping je
+ WHERE ij.na_sequence_id = idlist.na_sequence_id
+ AND ij.segment_start between i_first_pos and i_last_pos
+ AND ij.unique_reads >= 1
+ AND je.junctions_pan_id = ij.protocol_app_node_id
+ AND je.multiplier < 20
+ AND je.org_abbrev = ':ORG_ABBREV'
+ GROUP BY ij.na_sequence_id
+ , ij.segment_start
+ , ij.segment_end
+ , ij.is_reversed
+ , idlist.sequence_source_id
+ ) junc
+ LEFT JOIN :SCHEMA.JunctionGeneLocation ga
+ ON junc.na_sequence_id = ga.na_sequence_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+ AND junc.segment_start >= ga.start_min
+ AND junc.segment_end <= ga.end_max
+ AND junc.is_reversed = ga.is_reversed
+ LEFT JOIN :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp ag
+ ON junc.na_sequence_id = ag.na_sequence_id
+ AND junc.segment_start = ag.start_min
+ AND junc.segment_end = ag.end_max
+ AND junc.is_reversed = ag.is_reversed
+ WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron')
+ WINDOW w1 AS (
+ PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type
+ ORDER BY ga.total_expression ASC
+ ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
+ )
+ :PLPGSQL_DELIM
+ commit :PLPGSQL_DELIM
+ i_first_pos := i_last_pos + 1 :PLPGSQL_DELIM
+ i_last_pos := i_first_pos + iter_length :PLPGSQL_DELIM
+ END LOOP :PLPGSQL_DELIM
+ END LOOP :PLPGSQL_DELIM
+ END :PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL :PLPGSQL_DELIM
+
+;
+
+
+create index :ORG_ABBREV_gijtmp_gnscid_ix on :SCHEMA.:ORG_ABBREVGIJtmp (gene_source_id)
+
+ ;
:CREATE_AND_POPULATE
-
-
- CREATE TABLE GeneIntronJunction AS
- SELECT
- junc.*, CASE WHEN maxv.gene_source_id is not null and maxv.max_isrpm > 0 THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) ELSE null END as percent_max,
- CASE WHEN maxv.gene_source_id is not null THEN 1 ELSE 0 END as contained,
- CAST (null as numeric(10)) as taxon_id,
- cast (null as numeric(10)) as upstream_gene_id,
- cast (null as numeric) as upstream_distance,
- cast (null as numeric(10)) as downstream_gene_id,
- cast (null as numeric) as downstream_distance
- FROM
- gijtmp junc LEFT JOIN
- (
- SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm
- FROM gijtmp
- WHERE gene_source_id is not null
- GROUP BY gene_source_id
- ) maxv ON junc.gene_source_id = maxv.gene_source_id
-
-
-:DECLARE_PARTITION;
-
-
-
- UPDATE GeneIntronJunction gij
- SET taxon_id
- = (SELECT taxon_id
- FROM dots.NaSequence
- WHERE na_sequence_id = gij.na_sequence_id)
-
- ;
-
+ SELECT
+ junc.*
+ , CASE
+ WHEN maxv.gene_source_id is not null and maxv.max_isrpm > 0
+ THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2)
+ ELSE null
+ END as percent_max
+ , CASE
+ WHEN maxv.gene_source_id is not null
+ THEN 1
+ ELSE 0
+ END as contained
+ , :TAXON_ID as taxon_id
+-- , CAST (null as numeric(10)) as upstream_gene_id
+-- , CAST (null as numeric) as upstream_distance
+-- , CAST (null as numeric(10)) as downstream_gene_id
+-- , CAST (null as numeric) as downstream_distance
+ , ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ FROM
+ :SCHEMA.:ORG_ABBREVGIJtmp junc LEFT JOIN
+ (
+ SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm
+ FROM :SCHEMA.:ORG_ABBREVGIJtmp
+ WHERE gene_source_id is not null
+ GROUP BY gene_source_id
+ ) maxv ON junc.gene_source_id = maxv.gene_source_id
+:DECLARE_PARTITION
+ ;
+
+
+
+drop table :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp
+ ;
+
+drop table :SCHEMA.:ORG_ABBREVGIJtmp
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
index ad0f7593c5..22bd603691 100644
--- a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
+++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql
@@ -1,13 +1,12 @@
- create index gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
- ;
-
- create index gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id)
- ;
+create index gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed)
+ ;
- create index gijnew_txnloc_ix
- on :SCHEMA.GeneIntronJunction
- (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed,
- total_unique, total_isrpm, annotated_intron)
-
- ;
+create index gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id)
+ ;
+create index gijnew_txnloc_ix
+ on :SCHEMA.GeneIntronJunction
+ (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed,
+ total_unique, total_isrpm, annotated_intron)
+
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql
deleted file mode 100644
index 0025d7d161..0000000000
--- a/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql
+++ /dev/null
@@ -1,47 +0,0 @@
-:CREATE_AND_POPULATE
-
-
- CREATE TABLE GeneMaxIntronGIJ (
- protocol_app_node_id NUMERIC(10),
- gene_source_id VARCHAR(200),
- max_unique NUMERIC,
- max_isrpm NUMERIC,
- sum_unique NUMERIC,
- sum_isrpm NUMERIC,
- avg_unique NUMERIC,
- avg_isrpm NUMERIC
- )
-
-
-:DECLARE_PARTITION;
-
-
-
- DO $$
- DECLARE
- idlist RECORD;
- BEGIN
- FOR idlist IN (
- SELECT DISTINCT na_sequence_id
- FROM GeneIdLocGIJ
- )
- LOOP
- INSERT INTO GeneMaxIntronGIJ (
- SELECT j.protocol_app_node_id, ga.source_id, max(unique_reads) as max_unique, max(round(j.unique_reads * mult.multiplier,2)) as max_isrpm,
- sum(unique_reads) as sum_unique, sum(round(j.unique_reads * mult.multiplier,2)) as sum_isrpm, avg(unique_reads) as avg_unique, avg(round(j.unique_reads * mult.multiplier,2)) as avg_isrpm
- FROM apidb.intronjunction j, GeneIdLocGIJ ga, namemappinggij mult
- WHERE ga.na_sequence_id = idlist.na_sequence_id
- AND ga.na_sequence_id = j.na_sequence_id
- AND ga.start_min <= j.segment_start
- AND ga.end_max >= j.segment_end
- AND ga.is_reversed = j.is_reversed
- AND j.protocol_app_node_id = mult.junctions_pan_id
- GROUP BY j.protocol_app_node_id, ga.source_id
- );
- commit;
- END LOOP;
- END;
- $$ LANGUAGE PLPGSQL;
-
- ;
-
diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql
deleted file mode 100644
index 9bd71417ae..0000000000
--- a/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql
+++ /dev/null
@@ -1,3 +0,0 @@
- CREATE INDEX GnMxIntGIJ_ix on :SCHEMA.GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id)
- ;
-
diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql
new file mode 100644
index 0000000000..593fbbaee4
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql
@@ -0,0 +1,69 @@
+drop table if exists :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp
+ ;
+
+create unlogged table :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp (
+ protocol_app_node_id NUMERIC(10),
+ gene_source_id VARCHAR(200),
+ max_unique NUMERIC,
+ max_isrpm NUMERIC,
+ sum_unique NUMERIC,
+ sum_isrpm NUMERIC,
+ avg_unique NUMERIC,
+ avg_isrpm NUMERIC,
+ project_id VARCHAR(20),
+ org_abbrev VARCHAR(20),
+ modification_date TIMESTAMP
+ )
+;
+
+
+DO $$
+ DECLARE
+ idlist RECORD :PLPGSQL_DELIM
+ BEGIN
+ FOR idlist IN (
+ SELECT DISTINCT na_sequence_id
+ FROM :SCHEMA.JunctionGeneLocation
+ WHERE org_abbrev = ':ORG_ABBREV'
+ )
+ LOOP
+ INSERT INTO :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp (
+ SELECT j.protocol_app_node_id
+ , ga.source_id
+ , max(unique_reads) as max_unique
+ , max(round(j.unique_reads * mult.multiplier,2)) as max_isrpm
+ , sum(unique_reads) as sum_unique
+ , sum(round(j.unique_reads * mult.multiplier,2)) as sum_isrpm
+ , avg(unique_reads) as avg_unique
+ , avg(round(j.unique_reads * mult.multiplier,2)) as avg_isrpm
+ , ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ FROM apidb.intronjunction j
+ , :SCHEMA.JunctionGeneLocation ga
+ , :SCHEMA.JunctionToCoverageProfileMapping mult
+ WHERE ga.na_sequence_id = idlist.na_sequence_id
+ AND ga.na_sequence_id = j.na_sequence_id
+ AND ga.start_min <= j.segment_start
+ AND ga.end_max >= j.segment_end
+ AND ga.is_reversed = j.is_reversed
+ AND j.protocol_app_node_id = mult.junctions_pan_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+ AND mult.org_abbrev = ':ORG_ABBREV'
+ GROUP BY j.protocol_app_node_id, ga.source_id
+ ) :PLPGSQL_DELIM
+ commit :PLPGSQL_DELIM
+ END LOOP :PLPGSQL_DELIM
+ END :PLPGSQL_DELIM
+$$ LANGUAGE PLPGSQL :PLPGSQL_DELIM
+ ;
+
+
+
+:CREATE_AND_POPULATE
+select * from :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp
+:DECLARE_PARTITION;
+
+
+drop table :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql
new file mode 100644
index 0000000000..8915a4a6ea
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql
@@ -0,0 +1,3 @@
+CREATE INDEX GnMxIntGIJ_ix on :SCHEMA.GeneMaxJunction (gene_source_id,protocol_app_node_id)
+ ;
+
diff --git a/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql
new file mode 100644
index 0000000000..0876f0b3a0
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql
@@ -0,0 +1,21 @@
+DROP TABLE if exists :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp;
+/*
+
+ATTENTION: This table is empty. We will populate it in the comparative graph
+We are creating them it so that they it is partitioned.
+*/
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp (
+ gene_id VARCHAR(80),
+ group_id VARCHAR(16),
+ project_id varchar(20),
+ org_abbrev varchar(20),
+ modification_date timestamp
+);
+
+:CREATE_AND_POPULATE
+SELECT ogt.* from :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp ogt
+:DECLARE_PARTITION;
+
+
+DROP TABLE :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql
new file mode 100644
index 0000000000..c7ce2fd90a
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql
@@ -0,0 +1,2 @@
+alter table :SCHEMA.GeneOrthologGroup
+ ADD CONSTRAINT GeneOrthologGroup_pk PRIMARY KEY (gene_id, group_id);
diff --git a/Model/lib/psql/webready/orgSpecific/GeneProduct.psql b/Model/lib/psql/webready/orgSpecific/GeneProduct.psql
new file mode 100644
index 0000000000..ed8670eda5
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneProduct.psql
@@ -0,0 +1,130 @@
+
+create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp as
+(select gf.na_feature_id
+ , gf.source_id
+ , gf.product
+ from dots.genefeature gf
+ , dots.nasequence nas
+ where gf.na_sequence_id = nas.na_sequence_id
+ and nas.taxon_id = :TAXON_ID
+)
+;
+
+
+:CREATE_AND_POPULATE
+ with gfp_preferred
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select distinct gf.source_id, gfp.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, apidb.GeneFeatureProduct gfp
+ where gfp.na_feature_id = gf.na_feature_id
+ and gfp.is_preferred = 1) tmp
+ group by source_id),
+ gfp_any
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select distinct gf.source_id, gfp.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, apidb.GeneFeatureProduct gfp
+ where gfp.na_feature_id = gf.na_feature_id) tmp
+ group by source_id),
+ tp_preferred
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select distinct gf.source_id, tp.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t, apidb.TranscriptProduct tp
+ where t.parent_id = gf.na_feature_id
+ and tp.na_feature_id = t.na_feature_id
+ and tp.is_preferred = 1) tmp
+ group by source_id),
+ gf_product
+ as (select source_id, product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf
+ where product is not null),
+ tp_any
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select distinct gf.source_id, tp.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t, apidb.TranscriptProduct tp
+ where t.parent_id = gf.na_feature_id
+ and tp.na_feature_id = t.na_feature_id) tmp
+ group by source_id),
+ t_product
+ as (select source_id,
+ substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product,
+ count(*) as value_count
+ from (select gf.source_id, t.product
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t
+ where t.parent_id = gf.na_feature_id
+ and t.product is not null) tmp
+ group by source_id)
+ select gf.source_id,
+ coalesce(gfp_preferred.product, gfp_any.product, tp_preferred.product,
+ gf.product, tp_any.product, t_product.product)
+ as product,
+ case
+ when gfp_preferred.product is not null
+ then gfp_preferred.value_count
+ when gfp_any.product is not null
+ then gfp_any.value_count
+ when tp_preferred.product is not null
+ then tp_preferred.value_count
+ when gf.product is not null
+ then 1
+ when tp_any.product is not null
+ then tp_any.value_count
+ when t_product.product is not null
+ then t_product.value_count
+ else 0 -- 'unspecified product'
+ end
+ as value_count,
+ case
+ when gfp_preferred.product is not null
+ then 1
+ when gfp_any.product is not null
+ then 2
+ when tp_preferred.product is not null
+ then 3
+ when gf.product is not null
+ then 4
+ when tp_any.product is not null
+ then 5
+ when t_product.product is not null
+ then 6
+ else 7
+ end
+ as source_rule,
+ ':PROJECT_ID' as project_id,
+ ':ORG_ABBREV' as org_abbrev,
+ current_timestamp as modification_date
+ from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf
+ LEFT JOIN gfp_preferred ON gf.source_id = gfp_preferred.source_id
+ LEFT JOIN gfp_any ON gf.source_id = gfp_any.source_id
+ LEFT JOIN tp_preferred ON gf.source_id = tp_preferred.source_id
+ LEFT JOIN gf_product ON gf.source_id = gf_product.source_id
+ LEFT JOIN tp_any ON gf.source_id = tp_any.source_id
+ LEFT JOIN t_product ON gf.source_id = t_product.source_id
+:DECLARE_PARTITION;
+
+
+/*
+
+
+Dependent Step
+ gene/protein_id -> uniprot accession and product
+
+This step will assume it has all uniprot accessions assined to the protein and uniprot product name and have pfams in our hand
+
+
+
+
+
+*/
+
+delete from :SCHEMA.GeneProduct where product is null and org_abbrev = ':ORG_ABBREV';
+
+drop table :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql
new file mode 100644
index 0000000000..910d965521
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql
@@ -0,0 +1,2 @@
+ CREATE INDEX GeneProduct_gene_idx ON :SCHEMA.GeneProduct (source_id, product)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
index 09f86475ac..e8f4481074 100644
--- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql
@@ -63,7 +63,7 @@
WHERE
sequence.taxon_id = taxon.taxon_id
AND sequence.sequence_ontology_id = so.ontology_term_id
- AND (sequence.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0)
+ AND sequence.taxon_id = :TAXON_ID
AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle')
ORDER BY organism, source_id
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql
new file mode 100644
index 0000000000..b2a85443db
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql
@@ -0,0 +1,32 @@
+:CREATE_AND_POPULATE
+ WITH org_tot AS (
+ SELECT
+ gs.organism
+ , min(gij.total_unique) as min_annot_score
+ , PERCENTILE_cont(0.005) within group (order by gij.total_unique asc) as perc005_annot_score
+ , PERCENTILE_cont(0.01) within group (order by gij.total_unique asc) as perc01_annot_score
+ , min(gij.percent_max) as min_annot_percent_max
+ , PERCENTILE_cont(0.0001) within group (order by gij.percent_max asc) as perc0001_annot_percent_max
+ , PERCENTILE_cont(0.0005) within group (order by gij.percent_max asc) as perc0005_annot_percent_max
+ , floor(max(gij.segment_end - gij.segment_start) * 1.25) as max_intron_length
+ FROM :SCHEMA.geneintronjunction gij
+ , :SCHEMA.genomicseqattributes gs
+ WHERE gs.na_sequence_id = gij.na_sequence_id
+ AND gij.annotated_intron = 'Yes'
+ AND gij.org_abbrev = ':ORG_ABBREV'
+ AND gs.org_abbrev = ':ORG_ABBREV'
+ GROUP BY gs.organism
+ )
+ SELECT gs.na_sequence_id
+ , gs.source_id
+ , ot.*
+ , ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+ FROM :SCHEMA.genomicseqattributes gs
+ , org_tot ot
+ WHERE gs.organism = ot.organism
+ AND gs.na_sequence_id in (SELECT DISTINCT ij.na_sequence_id FROM apidb.intronjunction ij, dots.nasequence s where ij.na_sequence_id = s.na_sequence_id and s.taxon_id = :TAXON_ID)
+ AND gs.org_abbrev = ':ORG_ABBREV'
+:DECLARE_PARTITION
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql
new file mode 100644
index 0000000000..73401d0fe4
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql
@@ -0,0 +1,2 @@
+create index GeneIntJuncStat_ix on :SCHEMA.GenomicSeqJunctionStats (na_sequence_id)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql
index f9895a117e..1e8d386532 100644
--- a/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql
+++ b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql
@@ -1,119 +1,53 @@
:CREATE_AND_POPULATE
-
-
- create table :ORG_ABBREVIntronSupportLevel as
- SELECT * FROM (
- SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-high') as string_value
- FROM (
- SELECT gene_source_id
- , ontology_term
- , case when count(*) = intron_count THEN 'All'
- when count(*) = 0 THEN 'None'
- else 'Any' end as string_value
- FROM (
- SELECT gij.gene_source_id
- , 'intron_junction' as ontology_term
- , intronCount.intron_count
- FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
- , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
- WHERE gij.gene_source_id = intronCount.source_id
- and gij.na_sequence_id = stats.na_sequence_id
- and gij.annotated_intron = 'Yes'
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= 2 /*stats.perc0005_annot_percent_max*/)
- ) t
- GROUP BY gene_source_id, ontology_term, intron_count
- ) t
- WHERE string_value = 'All'
-
- UNION
-
- SELECT gene_source_id
- , ontology_term
- , case when count(*) = intron_count THEN 'All-high'
- when count(*) = 0 THEN 'None'
- else 'Any-high' end as string_value
- FROM (
- SELECT gij.gene_source_id
- , 'intron_junction' as ontology_term
- , intronCount.intron_count
- FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
- , (SELECT count (*) as intron_count, source_id FROM apidbtuning.genemodeldump WHERE type = 'Intron' GROUP BY source_id) intronCount
- WHERE gij.gene_source_id = intronCount.source_id
- AND gij.na_sequence_id = stats.na_sequence_id
- AND gij.annotated_intron = 'Yes'
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= 2)
- ) t2
- GROUP BY gene_source_id, ontology_term, intron_count
-
- UNION
-
- SELECT gene_source_id
- , ontology_term
- , CASE WHEN count(*) = intron_count THEN 'All-low'
- WHEN count(*) = 0 THEN 'None'
- ELSE 'Any-low' END as string_value
- FROM (
- SELECT gij.gene_source_id
- , 'intron_junction' as ontology_term
- , intronCount.intron_count
- FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
- , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
- WHERE gij.gene_source_id = intronCount.source_id
- AND gij.na_sequence_id = stats.na_sequence_id
- AND gij.annotated_intron = 'Yes'
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max)
- AND gij.intron_feature_id not in (
- SELECT gij.intron_feature_id
- FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats
- WHERE gij.na_sequence_id = stats.na_sequence_id
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= 2)
- )
- ) t3
- GROUP BY gene_source_id, ontology_term, intron_count
-
- UNION
-
- SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-low') as string_value
- FROM (
- SELECT gene_source_id
- , ontology_term
- , case when count(*) = intron_count THEN 'All'
- when count(*) = 0 THEN 'None'
- else 'Any' end as string_value
- FROM (
- SELECT gij.gene_source_id
- , 'intron_junction' as ontology_term
- , intronCount.intron_count
- FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats
- , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount
- WHERE gij.gene_source_id = intronCount.source_id
- AND gij.na_sequence_id = stats.na_sequence_id
- AND gij.annotated_intron = 'Yes'
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max)
- AND gij.intron_feature_id not in (
- SELECT gij.intron_feature_id
- FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats
- WHERE gij.na_sequence_id = stats.na_sequence_id
- AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2
- AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END
- AND (gij.contained = 0 or gij.percent_max >= 2)
- )
- ) t
- GROUP BY gene_source_id, ontology_term, intron_count
- ) t4
- WHERE string_value = 'All'
- ) t
-
-
+with annotatedJunctions AS (
+ select gene_source_id
+ , count(*) as annotated_count
+ from (
+ select distinct ta.gene_source_id
+ , il.na_sequence_id
+ , il.start_min
+ , il.end_max
+ , il.is_reversed
+ from apidb.intronlocation il
+ inner join :SCHEMA.transcriptattributes ta
+ ON il.parent_id = ta.na_feature_id
+ ) group by gene_source_id
+), exptJunctions as (
+ select gij.gene_source_id
+ , case when gij.total_unique >= stats.perc01_annot_score and gij.percent_max >= 2
+ then 'High'
+ else 'Low'
+ end as conf
+ FROM :SCHEMA.GeneIntronJunction gij
+ JOIN :SCHEMA.GenomicSeqJunctionStats stats
+ ON gij.na_sequence_id = stats.na_sequence_id
+ where gij.annotated_intron = 'Yes' -- we only care about the annotated ones
+ and gij.org_abbrev = ':ORG_ABBREV'
+ and stats.org_abbrev = ':ORG_ABBREV'
+), exptJunctionCounts as (
+ select gene_source_id
+ , conf
+ , count(*) as intron_count
+ from exptJunctions
+ group by gene_source_id
+ , conf
+)
+select aj.gene_source_id
+ , CASE WHEN aj.annotated_count = ejc.intron_count
+ THEN 'All-high'
+ ELSE 'Any-high'
+ END as string_value
+ from annotatedJunctions aj
+ left join exptJunctionCounts ejc on aj.gene_source_id = ejc.gene_source_id
+ where ejc.conf = 'High'
+ AND ejc.intron_count > 0
+select aj.gene_source_id
+ , CASE WHEN aj.annotated_count = sum(ejc.intron_count)
+ THEN 'All-low'
+ ELSE 'Any-low'
+ END as string_value
+ from annotatedJunctions aj
+ left join exptJunctionCounts ejc on aj.gene_source_id = ejc.gene_source_id
+group by aj.gene_source_id, aj.annotated_count
+having sum(ejc.intron_count) > 0
:DECLARE_PARTITION;
-
diff --git a/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql
new file mode 100644
index 0000000000..35b6c6318f
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql
@@ -0,0 +1,23 @@
+/*
+ The tuning table this was based on used a loop over the nasequences. We can add that back if slow but I am:
+ 1. using geneattributes which will make this faster than joining genefeature to nalocation and grouping
+ 2. using input tables here that are already partitioned
+*/
+:CREATE_AND_POPULATE
+SELECT ga.na_sequence_id
+ , ga.start_min
+ , ga.is_reversed
+ , ga.end_max
+ , ga.na_feature_id
+ , ga.source_id,
+ round(sum(nafe.value)::NUMERIC,2) as total_expression
+ FROM :SCHEMA.GeneAttributes ga
+ , :SCHEMA.JunctionToCoverageProfileMapping je
+ , results.nafeatureexpression nafe
+ WHERE ga.na_feature_id = nafe.na_feature_id
+ AND nafe.protocol_app_node_id = je.exp_pan_id
+ AND ga.org_abbrev = ':ORG_ABBREV'
+ AND je.org_abbrev = ':ORG_ABBREV'
+ GROUP BY ga.na_sequence_id,l.start_min,l.is_reversed,l.end_max,ga.na_feature_id,ga.source_id
+:DECLARE_PARTITION
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql
new file mode 100644
index 0000000000..a2d30c4cba
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql
@@ -0,0 +1,5 @@
+create index gnidloc_nafid_ix on :SCHEMA.JunctionGeneLocation (na_feature_id)
+ ;
+
+CREATE INDEX gnattidloc_pk_ix ON :SCHEMA.JunctionGeneLocation (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql
new file mode 100644
index 0000000000..462937437d
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql
@@ -0,0 +1,143 @@
+drop table if exists :SCHEMA.:ORG_ABBREVJunctionExpressionTmp;
+
+drop table if exists :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp;
+
+
+/*
+ Join junctions to Expression values and get multiplier and decide on strand switching
+*/
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunctionExpressionTmp AS
+ WITH stats AS (
+ SELECT protocol_app_node_id
+ --, 'total' as type
+ --, count(*) as total_junctions
+ --, sum(unique_reads) as total_reads
+ , round(1000000/sum(unique_reads),4) as multiplier
+ FROM apidb.IntronJunction ij, dots.nasequence s
+ WHERE unique_reads >= 1
+ AND ij.na_sequence_id = s.na_sequence_id
+ AND s.taxon_id = 1802185
+ GROUP BY protocol_app_node_id
+ ), ij AS (
+ SELECT pj.output_pan_id as junctions_pan_id
+ , p.output_pan_id as expression_pan_id
+ , avg(nafe.value) as avg_value,pan.name as exp_name
+ , regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name
+ FROM :SCHEMA.panio p
+ , :SCHEMA:panio pj
+ , results.nafeatureexpression nafe
+ , study.protocolappnode pan
+ WHERE pj.output_pan_id in (select distinct protocol_app_node_id from stats)
+ AND pj.input_pan_id = p.input_pan_id
+ AND p.output_pan_id = pan.protocol_app_node_id
+ AND pan.name like '%tpm - unique%' -- NOTE: probably better to use raw counts here??
+ AND p.output_pan_id = nafe.protocol_app_node_id
+ AND p.org_abbrev = ':ORG_ABBREV'
+ AND pj.org_abbrev = ':ORG_ABBREV'
+ GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
+ ) , part AS (
+ SELECT
+ ij.junctions_pan_id
+ , ij.avg_value
+ , stats.multiplier
+ , max(ij.expression_pan_id) OVER w as max_exp_pan_id
+ , max(ij.sample_name) OVER w as max_sample_Name
+ , max(ij.exp_name) OVER w as max_exp_name
+ FROM ij, stats
+ WHERE ij.junctions_pan_id = stats.protocol_app_node_id
+ WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
+ )
+ SELECT DISTINCT junctions_pan_id
+ , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
+ , first_value(max_sample_name) OVER w1 as sample_name
+ , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
+ , multiplier
+ FROM part
+ WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
+ ;
+
+create index :ORG_ABBREVjunexpgijtmp_ix on :SCHEMA.:ORG_ABBREVJunctionExpressionTmp(junctions_pan_id,exp_pan_id)
+ ;
+
+
+/*
+ Grab general mapping stats
+*/
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp (
+ junctions_pan_id,
+ read_length,
+ mapped_reads,
+ avg_mapping_coverage,
+ num_replicates
+) AS
+ SELECT junctions_pan_id
+ , round(avg(average_read_length - 2),1) as read_length
+ , round(avg(number_mapped_reads),1) as mapped_reads
+ , round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) as avg_mapping_coverage
+ , count(*) as num_replicates
+ FROM (SELECT je.junctions_pan_id
+ , ca.value::NUMERIC as average_read_length
+ , cb.value::NUMERIC as number_mapped_reads
+ , cc.value::NUMERIC as avg_mapping_coverage
+ FROM :SCHEMA.:ORG_ABBREVJunctionExpressionTmp je
+ , :SCHEMA.PANIO ioa
+ , STUDY.CHARACTERISTIC ca
+ , STUDY.CHARACTERISTIC cb
+ , sres.ontologyterm ota
+ , sres.ontologyterm otb,
+ STUDY.CHARACTERISTIC cc, sres.ontologyterm otc
+ WHERE je.junctions_pan_id = ioa.output_pan_id
+ AND ioa.input_pan_id = ca.protocol_app_node_id
+ AND ca.value is not null
+ AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID
+ AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length'
+ AND ca.protocol_app_node_id = cb.protocol_app_node_id
+ AND cb.value is not null
+ AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID
+ AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads'
+ AND ca.protocol_app_node_id = cc.protocol_app_node_id
+ AND cc.value is not null
+ AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID
+ AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage'
+ AND ioa.org_abbrev = ':ORG_ABBREV'
+ ) t
+ GROUP by junctions_pan_id
+;
+
+
+CREATE INDEX :ORG_ABBREVmpstats_pk_ix on :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp
+ (junctions_pan_id,read_length,mapped_reads,avg_mapping_coverage,num_replicates)
+;
+
+
+
+/*
+ This is the main table
+*/
+:CREATE_AND_POPULATE
+ SELECT DISTINCT je.sample_name
+ , je.junctions_pan_id
+ , je.exp_pan_id
+ , ms.read_length
+ , ms.mapped_reads
+ , ms.avg_mapping_coverage
+ , ms.num_replicates
+ , je.switch_strands
+ , je.multiplier
+ , ':PROJECT_ID' as project_id
+ , ':ORG_ABBREV' as org_abbrev
+ , current_timestamp as modification_date
+
+ FROM junexpgijtmp je
+ , mappingstatsgijtmp ms
+ WHERE je.junctions_pan_id = ms.junctions_pan_id
+:DECLARE_PARTITION
+ ;
+
+
+drop table :SCHEMA.:ORG_ABBREVJunctionExpressionTmp
+;
+
+drop table :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql
new file mode 100644
index 0000000000..c724f94d67
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql
@@ -0,0 +1,2 @@
+create index JunctionCovMap_ix on :SCHEMA.JunctionToCoverageProfileMapping(junctions_pan_id,exp_pan_id)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
deleted file mode 100644
index e0d672aa7c..0000000000
--- a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql
+++ /dev/null
@@ -1,13 +0,0 @@
-:CREATE_AND_POPULATE
-
- SELECT DISTINCT pn.pathway_node_id
- , tp.gene_source_id, tp.project_id, tp.org_abbrev, current_timestamp as modification_date
- FROM :SCHEMA.transcriptpathway tp
- , sres.pathwaynode pn
- WHERE tp.pathway_id = pn.pathway_id
- AND tp.ec_number_gene like replace(pn.display_label, '-', '%')
- and tp.org_abbrev = ':ORG_ABBREV'
-
-
-:DECLARE_PARTITION;
-
diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
index 21041cb21c..fbac42a0da 100644
--- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
+++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql
@@ -1,44 +1,28 @@
-:CREATE_AND_POPULATE
- SELECT t2.*, current_timestamp as modification_date FROM (
- SELECT DISTINCT gene_source_id
- , project_id
- , org_abbrev
- , pathway_source_id
- , pathway_name
- , count(reaction_source_id) as reactions
- , enzyme
- , expasy_url
- , pathway_source
- , exact_match
- FROM (
- SELECT DISTINCT tp.gene_source_id
- , tp.project_id
- , tp.pathway_source_id
- , tp.pathway_name
- , tp.org_abbrev
- , pr.reaction_source_id
- , pr.enzyme
- , pr.expasy_url
- , tp.pathway_source
- , CASE max(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match
- FROM :SCHEMA.TranscriptPathway tp
- , :SCHEMA.PathwayAttributes pa
- , :SCHEMA.PathwayCompounds pc
- , :SCHEMA.PathwayReactions pr
- WHERE tp.pathway_id = pa.pathway_id
- AND pc.pathway_id = pa.pathway_id
- AND pr.reaction_id = pc.reaction_id
- AND pr.ext_db_name = pc.ext_db_name
- AND tp.ec_number_pathway = pr.enzyme
- AND tp.wildcard_count_gene <= tp.wildcard_count_pathway
- AND pr.enzyme != '-.-.-.-'
- AND tp.org_abbrev = ':ORG_ABBREV'
- GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrev, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source
- ) t
- GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match
- ) t2
- ORDER BY pathway_source, lower(pathway_name)
-
+DROP TABLE IF EXISTS :SCHEMA.ORG_ABBREVPathwaysGeneTableTmp;
+
+
+/* ATTENTION: This table is empty. We will populate it in the comparative graph as loading depends on TranscriptPathway.
+We are creating it here so that it is partitioned */
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp (
+ gene_source_id VARCHAR(80),
+ pathway_source_id VARCHAR(50),
+ pathway_name VARCHAR(150),
+ reactions NUMERIC(8),
+ enzyme VARCHAR(20),
+ expasy_url TEXT,
+ pathway_source TEXT,
+ exact_match VARCHAR(8),
+
+ --for partitioning
+ project_id VARCHAR(20),
+ org_abbrev VARCHAR(20),
+ modification_date timestamp
+);
+
+:CREATE_AND_POPULATE
+SELECT pgt.* FROM :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp pgt
:DECLARE_PARTITION;
+DROP TABLE :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql
new file mode 100644
index 0000000000..4c50716bf7
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql
@@ -0,0 +1,33 @@
+drop table if exists :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp
+
+/*
+ATTENTION: This script is used to make an empty table. It will be populated with a perl script.
+
+We make the table here in the usual way so we get partitioned webready table
+*/
+
+
+create unlogged table :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp (
+ location numeric(10),
+ type varchar(20),
+ na_sequence_id numeric(10),
+ is_unique numeric(1),
+ sum_cpm float8,
+ dist_to_first_atg numeric(10),
+ gene_source_id varchar(80),
+ transcript_source_id varchar(80),
+ dist_to_cds numeric(10),
+ is_dominant numeric(1),
+ strand char(1),
+ project_id varchar(20),
+ org_abbrev varchar(20),
+ modification_date timestamp
+ );
+
+
+:CREATE_AND_POPULATE
+select * from :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp
+:DECLARE_PARTITION;
+
+drop table :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp
+;
diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene_ix.psql b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript_ix.psql
similarity index 100%
rename from Model/lib/psql/webready/orgSpecific/PathwayNodeGene_ix.psql
rename to Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript_ix.psql
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
index 1e47c1624b..c6ca34e893 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql
@@ -37,6 +37,7 @@
COALESCE(preferred_name.name, any_name.name) AS gene_name,
cast(coalesce(preferred_gene_product.product, any_gene_product.product, gf.product)
as VARCHAR(300)) as old_gene_product,
+ COALESCE(gp.product, 'unspecified product') as gene_product,
REPLACE(so.name, '_', ' ') AS gene_type,
gf.name as gene_ebi_biotype,
gi.gene_id,
@@ -79,6 +80,7 @@
INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id
INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id
INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV'
+ LEFT JOIN :SCHEMA.GeneProduct gp ON gf.source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV'
INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id
INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id
INNER JOIN :SCHEMA.GenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id and gsa.org_abbrev = ':ORG_ABBREV'
@@ -204,7 +206,7 @@
) olds ON gf.na_feature_id = olds.na_feature_id
WHERE nl.is_top_level = 1
AND nl.feature_type = 'GeneFeature'
- AND (gsa.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0)
+ AND gsa.taxon_id = :TAXON_ID
AND species_name.name_class = 'scientific name'
AND (gf.is_predicted != 1 OR gf.is_predicted is null)
AND tn.name_class = 'scientific name'
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql b/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql
new file mode 100644
index 0000000000..e979f94986
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql
@@ -0,0 +1,35 @@
+
+DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptECTmp;
+/*
+
+ATTENTION: This table is empty. We will populate it in the comparative graph so that we can include ortho-derived EC numbers.
+We are creating them it so that they it is partitioned.
+*/
+
+/* This table maps EC numbers to transcripts */
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptECTmp (
+ source_id VARCHAR(80),
+ gene_source_id VARCHAR(80),
+ enzyme_class_id NUMERIC(12),
+ ec_number VARCHAR(16),
+ ec_number_1 NUMERIC(3),
+ ec_number_2 NUMERIC(3),
+ ec_number_3 NUMERIC(3),
+ ec_number_4 NUMERIC(3),
+ wildcard_count NUMERIC(3),
+ evidence_code VARCHAR(255),
+
+ -- fields required for partitioning
+ project_id VARCHAR(20),
+ org_abbrev VARCHAR(20),
+ modification_date timestamp
+
+);
+
+
+:CREATE_AND_POPULATE
+select tet.* from :SCHEMA.:ORG_ABBREVTranscriptECTmp tet
+:DECLARE_PARTITION;
+
+DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptECTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql
new file mode 100644
index 0000000000..5b9767528f
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql
@@ -0,0 +1,5 @@
+CREATE INDEX TranscriptEc_1_ix on :SCHEMA.TranscriptEC (source_id, ec_number)
+ ;
+
+CREATE INDEX TranscriptEc_2_ix on :SCHEMA.TranscriptEC (ec_number, source_id)
+ ;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql
new file mode 100644
index 0000000000..09facffb8c
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql
@@ -0,0 +1,23 @@
+
+DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp;
+/*
+
+ATTENTION: This table is empty. We will populate it in the comparative graph
+We are creating them it so that they it is partitioned.
+*/
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp (
+ source_id VARCHAR(80),
+ gene_id VARCHAR(80),
+ group_id VARCHAR(16),
+ project_id varchar(20),
+ org_abbrev varchar(20),
+ modification_date timestamp
+);
+
+:CREATE_AND_POPULATE
+SELECT ogt.* from :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp ogt
+:DECLARE_PARTITION;
+
+
+DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp;
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql
new file mode 100644
index 0000000000..ac9b3670e0
--- /dev/null
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql
@@ -0,0 +1,2 @@
+alter table :SCHEMA.TranscriptOrthologGroup
+ add constraint TranscriptOrthologGroup_pk primary key (source_id);
diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
index 3280209fa6..f63449f735 100644
--- a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
+++ b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql
@@ -1,79 +1,40 @@
-:CREATE_AND_POPULATE
- WITH transcript_ec AS (
- SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
- regexp_count( ec.ec_number, '-') as wildcard_count
- FROM sres.EnzymeClass ec
- WHERE enzyme_class_id IN (SELECT aseqEc.enzyme_class_id
- FROM dots.AaSequenceEnzymeClass aseqEc, dots.aasequence seq
- WHERE aseqEc.aa_sequence_id = seq.aa_sequence_id
- AND seq.taxon_id = :TAXON_ID)
- GROUP BY ec.enzyme_class_id
- ),
- pathway_node_ec AS (
- SELECT distinct pn.pathway_id, pn.row_id as enzyme_class_id
- FROM sres.PathwayNode pn, sres.ontologyterm ot
- WHERE pn.pathway_node_type_id = ot.ontology_term_id
- AND ot.name = 'enzyme'
- AND pn.display_label != '-.-.-.-'
- ),
- pathway_ec AS (
- SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4,
- regexp_count( ec.ec_number, '-') as wildcard_count
- FROM sres.EnzymeClass ec
- WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM pathway_node_ec)
- GROUP BY ec.enzyme_class_id
- ),
- ec_match AS (
- SELECT tec.enzyme_class_id as transcript_enzyme_class_id,
- pec.enzyme_class_id as pathway_enzyme_class_id,
- tec.wildcard_count as wildcard_count_transcript,
- pec.wildcard_count as wildcard_count_pathway,
- tec.ec_number as ec_number_transcript,
- pec.ec_number as ec_number_pathway
- FROM transcript_ec tec, pathway_ec pec
- WHERE (tec.ec_number_1 = pec.ec_number_1 or tec.ec_number_1 is null or pec.ec_number_1 is null)
- AND (tec.ec_number_2 = pec.ec_number_2 or tec.ec_number_2 is null or pec.ec_number_2 is null)
- AND (tec.ec_number_3 = pec.ec_number_3 or tec.ec_number_3 is null or pec.ec_number_3 is null)
- AND (tec.ec_number_4 = pec.ec_number_4 or tec.ec_number_4 is null or pec.ec_number_4 is null)
- )
- SELECT DISTINCT
- ':PROJECT_ID' as project_id
- , ':ORG_ABBREV' as org_abbrev
- , CURRENT_TIMESTAMP as modification_date
- , ga.source_id
- , ga.gene_source_id
- , pa.source_id as pathway_source_id
- , pa.name as pathway_name
- , ec_match.ec_number_transcript as ec_number_gene
- , ec_match.wildcard_count_transcript as wildcard_count_gene
- , ec_match.ec_number_pathway
- , ec_match.wildcard_count_pathway
- , CASE WHEN ec_match.ec_number_pathway = ec_match.ec_number_transcript
- THEN 1
- ELSE 0 END as exact_match
- , CASE WHEN ec_match.wildcard_count_pathway + ec_match.wildcard_count_transcript = 0
- THEN 1
- ELSE 0 END as complete_ec
- , pa.pathway_id
- , pa.pathway_source
- , p.external_database_release_id
- FROM :SCHEMA.PathwayAttributes pa
- , sres.pathway p
- , pathway_node_ec pec
- , ec_match
- , dots.AaSequenceEnzymeClass asec
- , :SCHEMA.TranscriptAttributes ga
- WHERE ga.org_abbrev = ':ORG_ABBREV'
- AND pa.pathway_id = pec.pathway_id
- AND p.pathway_id = pa.pathway_id
- AND pec.enzyme_class_id = ec_match.pathway_enzyme_class_id
- AND asec.enzyme_class_id = ec_match.transcript_enzyme_class_id
- AND ga.aa_sequence_id = asec.aa_sequence_id
- -- TODO: Need new downstream table to bring in OrthoMCLDerived EC associations
- -- AND (
- -- (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived')
- -- OR ga.orthomcl_name IS NOT NULL
- -- )
- ;
+DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp;
+/*
+
+ATTENTION: This table is empty. We will populate it in the comparative graph so that we can include ortho-derived EC numbers.
+We are creating it here so that it is partitioned.
+*/
+
+/* This is the final TranscriptPathway table */
+
+CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp (
+ -- transcript information
+ source_id VARCHAR(80),
+ gene_source_id VARCHAR(80),
+
+ --pathway information
+ pathway_source_id VARCHAR(50),
+ pathway_name VARCHAR(150),
+ pathway_id NUMERIC(12),
+ pathway_source TEXT, -- TODO should this be a varchar?
+ external_database_release_id NUMERIC(10),
+ --match information
+ ec_number_gene VARCHAR(16),
+ wildcard_count_gene NUMERIC(3),
+ ec_number_pathway VARCHAR(16),
+ wildcard_count_pathway NUMERIC(3),
+ exact_match NUMERIC(3),
+ complete_ec NUMERIC(3),
+
+ -- fields required for partioning
+ project_id varchar(20),
+ org_abbrev varchar(20),
+ modification_date timestamp
+);
+
+:CREATE_AND_POPULATE
+SELECT tpt.* from :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp tpt
:DECLARE_PARTITION;
+
+DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp;
diff --git a/Model/lib/psql/webready/unknown/NameMappingGIJ.psql b/Model/lib/psql/webready/unknown/NameMappingGIJ.psql
deleted file mode 100644
index d21a1b1cf4..0000000000
--- a/Model/lib/psql/webready/unknown/NameMappingGIJ.psql
+++ /dev/null
@@ -1,118 +0,0 @@
- DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVJunExpGIJtmp;
-
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunExpGIJtmp AS
- WITH ij AS (
- SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name,
- regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name
- FROM :SCHEMA.panio p, :SCHEMA.panio pj, results.nafeatureexpression nafe, study.protocolappnode pan
- WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction)
- AND pj.input_pan_id = p.input_pan_id
- AND p.output_pan_id = pan.protocol_app_node_id
- AND pan.name like '%tpm - unique%'
- AND p.output_pan_id = nafe.protocol_app_node_id
- and p.org_abbrev = ':ORG_ABBREV'
- and pj.org_abbrev = ':ORG_ABBREV'
- GROUP BY pj.output_pan_id, p.output_pan_id, pan.name
- ORDER BY pj.output_pan_id
- ) , stats AS (
- SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions,
- sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier
- FROM apidb.IntronJunction
- WHERE unique_reads >= 1
- GROUP BY protocol_app_node_id
- ), part AS (
- SELECT
- ij.junctions_pan_id, ij.avg_value, stats.multiplier
- , max(ij.expression_pan_id) OVER w as max_exp_pan_id
- , max(ij.sample_name) OVER w as max_sample_Name
- , max(ij.exp_name) OVER w as max_exp_name
- FROM ij, stats
- WHERE ij.junctions_pan_id = stats.protocol_app_node_id
- WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value)
- )
- SELECT DISTINCT * FROM (
- SELECT junctions_pan_id
- , first_value(max_exp_pan_id) OVER w1 as exp_pan_id
- , first_value(max_sample_name) OVER w1 as sample_name
- , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands
- , multiplier
- FROM part
- WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC)
- ) t
- ORDER BY junctions_pan_id
-
- ;
-
-
-
- create index junexpgijtmp_ix on :SCHEMA.:ORG_ABBREVJunExpGIJtmp(junctions_pan_id,exp_pan_id)
-
- ;
-
- drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp;
-
- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp (
- junctions_pan_id,
- read_length,
- mapped_reads,
- avg_mapping_coverage,
- num_replicates
- ) AS
- SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length,
- round(avg(number_mapped_reads),1) as mapped_reads,
- round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2)
- as avg_mapping_coverage,
- count(*) as num_replicates
- FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length,
- cb.value::NUMERIC as number_mapped_reads,
- cc.value::NUMERIC as avg_mapping_coverage
- FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota,
- :SCHEMA.PANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb,
- STUDY.CHARACTERISTIC cc, sres.ontologyterm otc
- WHERE je.junctions_pan_id = ioa.output_pan_id
- AND ioa.input_pan_id = ca.protocol_app_node_id
- AND ca.value is not null
- and ioa.org_abbrev = ':ORG_ABBREV'
- AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID
- AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length'
- AND ca.protocol_app_node_id = cb.protocol_app_node_id
- AND cb.value is not null
- AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID
- AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads'
- AND ca.protocol_app_node_id = cc.protocol_app_node_id
- AND cc.value is not null
- AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID
- AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage'
- ) t
- GROUP by Junctions_Pan_Id
-
- ;
-
-
-:CREATE_AND_POPULATE
- SELECT DISTINCT edp.dataset_presenter_display_name as exp_name,
- edp.external_database_name, je.sample_name,
- je.junctions_pan_id, je.exp_pan_id,
- substr(dp.value, 1, 4000) as presenter_switch_strands,
- substr(sj.value, 1, 4000) as show_intron_junctions,
- substr(uj.value, 1, 4000) as include_unified_junctions,
- ms.read_length, ms.mapped_reads, ms.avg_mapping_coverage, ms.num_replicates,
- je.switch_strands, je.multiplier
- FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, study.nodeNodeSet sl, study.NodeSet s, ExternalDbDatasetPresenter edp,
- DatasetProperty dp, DatasetProperty sj, DatasetProperty uj, mappingstatsgijtmp ms
- WHERE sl.protocol_app_node_id = je.junctions_pan_id
- AND je.junctions_pan_id = ms.junctions_pan_id
- AND s.node_set_id = sl.node_set_id
- AND s.node_type like 'junctions'
- AND s.external_database_release_id = edp.external_database_release_id
- AND dp.dataset_presenter_id = edp.dataset_presenter_id
- AND dp.property = 'switchStrandsProfiles'
- AND sj.dataset_presenter_id = edp.dataset_presenter_id
- AND sj.property = 'showIntronJunctions'
- AND uj.dataset_presenter_id = edp.dataset_presenter_id
- AND uj.property = 'includeInUnifiedJunctions'
- AND (substr(sj.value, 1, 10) = 'true' or substr(uj.value, 1, 10) = 'true')
-
-
-:DECLARE_PARTITION;
-
diff --git a/Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql b/Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql
deleted file mode 100644
index c386e6ba00..0000000000
--- a/Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql
+++ /dev/null
@@ -1,3 +0,0 @@
- create index namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id)
- ;
-