From 55e982a133a3eec08cc79878b8b0c0a7ca2c3662 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 9 May 2025 12:51:53 -0400 Subject: [PATCH 001/112] add initial web table psql files --- .../lib/psql/webtables/??/AlphaFoldGenes.psql | 103 +++ .../psql/webtables/??/AssociatedDataset.psql | 0 .../lib/psql/webtables/??/DatasetDetail.psql | 58 ++ .../psql/webtables/??/DatasetPresenter.psql | 0 .../psql/webtables/??/DomainAssignment.psql | 69 ++ Model/lib/psql/webtables/??/EdaGeneGraph.psql | 0 .../psql/webtables/??/EupathBuildDates.psql | 0 .../??/ExternalDbDatasetPresenter.psql | 46 + .../??/ExternalSequenceTaxonRank.psql | 60 ++ .../psql/webtables/??/GeneGroupProfile.psql | 30 + .../webtables/??/GenomicSeqAttributes.psql | 102 +++ .../??/GroupPhylogeneticProfile.psql | 20 + .../webtables/??/OrthologousTranscripts.psql | 73 ++ Model/lib/psql/webtables/??/PANExtDbRls.psql | 36 + Model/lib/psql/webtables/??/PANIO.psql | 59 ++ Model/lib/psql/webtables/??/PANResults.psql | 91 ++ .../psql/webtables/??/PhyleticPattern.psql | 395 +++++++++ Model/lib/psql/webtables/??/ProjectTaxon.psql | 151 ++++ .../psql/webtables/??/SequenceAttributes.psql | 131 +++ .../webtables/??/SequenceEnzymeClass.psql | 36 + .../webtables/??/SequencePieceClosure.psql | 48 ++ .../psql/webtables/??/StudyIdDatasetId.psql | 24 + .../psql/webtables/??/TypeAheadCounts.psql | 42 + .../psql/webtables/MG/CompoundAttributes.psql | 28 + Model/lib/psql/webtables/MG/CompoundId.psql | 39 + .../psql/webtables/MG/CompoundProperties.psql | 29 + .../psql/webtables/MG/CompoundTypeAheads.psql | 15 + .../webtables/MG/GroupDomainAttribute.psql | 32 + .../lib/psql/webtables/MG/OntologyLevels.psql | 48 ++ .../psql/webtables/MG/PathwayAttributes.psql | 68 ++ .../psql/webtables/MG/PathwayCompounds.psql | 91 ++ Model/lib/psql/webtables/MG/PathwayNodes.psql | 306 +++++++ .../psql/webtables/MG/PathwayReactions.psql | 141 +++ .../psql/webtables/MO/ChIPchipTranscript.psql | 44 + .../webtables/MO/ChIPchipTranscript_ix.psql | 7 + .../lib/psql/webtables/MO/ChrCopyNumbers.psql | 19 + .../psql/webtables/MO/ChrCopyNumbers_ix.psql | 16 + .../lib/psql/webtables/MO/CodingSequence.psql | 14 + .../psql/webtables/MO/CodingSequence_ix.psql | 7 + .../webtables/MO/DatasetExampleSourceId.psql | 23 + .../MO/DatasetExampleSourceId_ix.psql | 0 Model/lib/psql/webtables/MO/EqtlSpan.psql | 25 + Model/lib/psql/webtables/MO/EqtlSpan_ix.psql | 8 + .../webtables/MO/EstAlignmentGeneSummary.psql | 68 ++ .../MO/EstAlignmentGeneSummary_ix.psql | 20 + .../lib/psql/webtables/MO/EstAttributes.psql | 51 ++ .../psql/webtables/MO/EstAttributes_ix.psql | 7 + Model/lib/psql/webtables/MO/EstSequence.psql | 16 + .../lib/psql/webtables/MO/EstSequence_ix.psql | 7 + .../lib/psql/webtables/MO/GeneAttributes.psql | 113 +++ .../psql/webtables/MO/GeneAttributes_ix.psql | 90 ++ .../psql/webtables/MO/GeneCopyNumbers.psql | 29 + .../psql/webtables/MO/GeneCopyNumbers_ix.psql | 8 + Model/lib/psql/webtables/MO/GeneGoTable.psql | 26 + .../lib/psql/webtables/MO/GeneGoTable_ix.psql | 9 + Model/lib/psql/webtables/MO/GeneGoTerms.psql | 39 + .../lib/psql/webtables/MO/GeneGoTerms_ix.psql | 10 + Model/lib/psql/webtables/MO/GeneId.psql | 265 ++++++ Model/lib/psql/webtables/MO/GeneId_ix.psql | 35 + .../psql/webtables/MO/GeneIntJuncStats.psql | 25 + .../webtables/MO/GeneIntJuncStats_ix.psql | 6 + .../psql/webtables/MO/GeneIntronJunction.psql | 223 +++++ .../webtables/MO/GeneIntronJunction_ix.psql | 21 + .../lib/psql/webtables/MO/GeneLocations.psql | 22 + .../psql/webtables/MO/GeneLocations_ix.psql | 8 + .../psql/webtables/MO/GeneMaxIntronGIJ.psql | 47 + .../webtables/MO/GeneMaxIntronGIJ_ix.psql | 6 + .../lib/psql/webtables/MO/GeneModelDump.psql | 32 + .../psql/webtables/MO/GeneModelDump_ix.psql | 9 + .../psql/webtables/MO/GeneSummaryFilter.psql | 14 + .../webtables/MO/GeneSummaryFilter_ix.psql | 0 .../psql/webtables/MO/GenomicSequenceId.psql | 26 + .../webtables/MO/GenomicSequenceId_ix.psql | 21 + .../webtables/MO/GenomicSequenceSequence.psql | 14 + .../MO/GenomicSequenceSequence_ix.psql | 7 + .../lib/psql/webtables/MO/GoTermSummary.psql | 56 ++ .../psql/webtables/MO/GoTermSummary_ix.psql | 16 + .../psql/webtables/MO/IntronSupportLevel.psql | 119 +++ .../webtables/MO/IntronSupportLevel_ix.psql | 0 .../psql/webtables/MO/IntronUtrCoords.psql | 35 + .../psql/webtables/MO/IntronUtrCoords_ix.psql | 16 + .../lib/psql/webtables/MO/NameMappingGIJ.psql | 125 +++ .../psql/webtables/MO/NameMappingGIJ_ix.psql | 6 + .../webtables/MO/OrganismAbbreviation.psql | 15 + .../MO/OrganismAbbreviationBlast.psql | 42 + .../MO/OrganismAbbreviationBlast_ix.psql | 0 .../webtables/MO/OrganismAbbreviation_ix.psql | 0 .../psql/webtables/MO/OrganismAttributes.psql | 327 +++++++ .../webtables/MO/OrganismAttributes_ix.psql | 7 + .../webtables/MO/OrganismSelectTaxonRank.psql | 49 ++ .../MO/OrganismSelectTaxonRank_ix.psql | 0 .../psql/webtables/MO/PathwayNodeGene.psql | 14 + .../psql/webtables/MO/PathwayNodeGene_ix.psql | 0 .../psql/webtables/MO/PathwaysGeneTable.psql | 44 + .../webtables/MO/PathwaysGeneTable_ix.psql | 9 + .../lib/psql/webtables/MO/PdbSimilarity.psql | 32 + .../psql/webtables/MO/PdbSimilarity_ix.psql | 0 Model/lib/psql/webtables/MO/Profile.psql | 800 ++++++++++++++++++ .../lib/psql/webtables/MO/ProfileSamples.psql | 167 ++++ .../psql/webtables/MO/ProfileSamples_ix.psql | 22 + Model/lib/psql/webtables/MO/ProfileType.psql | 13 + .../lib/psql/webtables/MO/ProfileType_ix.psql | 0 Model/lib/psql/webtables/MO/Profile_ix.psql | 24 + .../psql/webtables/MO/ProteinAttributes.psql | 207 +++++ .../webtables/MO/ProteinAttributes_ix.psql | 14 + .../psql/webtables/MO/ProteinSequence.psql | 15 + .../psql/webtables/MO/ProteinSequence_ix.psql | 7 + Model/lib/psql/webtables/MO/RnaSeqStats.psql | 55 ++ .../lib/psql/webtables/MO/RnaSeqStats_ix.psql | 0 .../webtables/MO/SignalPeptideDomains.psql | 41 + .../webtables/MO/SignalPeptideDomains_ix.psql | 16 + Model/lib/psql/webtables/MO/TFBSGene.psql | 46 + Model/lib/psql/webtables/MO/TFBSGene_ix.psql | 14 + Model/lib/psql/webtables/MO/TaxonSpecies.psql | 35 + .../psql/webtables/MO/TaxonSpecies_ix.psql | 0 Model/lib/psql/webtables/MO/Taxonomy.psql | 28 + Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 9 + .../webtables/MO/TranscriptAttributes.psql | 440 ++++++++++ .../webtables/MO/TranscriptAttributes_ix.psql | 135 +++ .../webtables/MO/TranscriptCenDistance.psql | 19 + .../MO/TranscriptCenDistance_ix.psql | 8 + .../psql/webtables/MO/TranscriptPathway.psql | 112 +++ .../webtables/MO/TranscriptPathway_ix.psql | 18 + .../psql/webtables/MO/TranscriptSequence.psql | 11 + .../webtables/MO/TranscriptSequence_ix.psql | 7 + .../webtables/MO/TransmembraneDomains.psql | 27 + .../webtables/MO/TransmembraneDomains_ix.psql | 8 + 127 files changed, 7018 insertions(+) create mode 100644 Model/lib/psql/webtables/??/AlphaFoldGenes.psql create mode 100644 Model/lib/psql/webtables/??/AssociatedDataset.psql create mode 100644 Model/lib/psql/webtables/??/DatasetDetail.psql create mode 100644 Model/lib/psql/webtables/??/DatasetPresenter.psql create mode 100644 Model/lib/psql/webtables/??/DomainAssignment.psql create mode 100644 Model/lib/psql/webtables/??/EdaGeneGraph.psql create mode 100644 Model/lib/psql/webtables/??/EupathBuildDates.psql create mode 100644 Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql create mode 100644 Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql create mode 100644 Model/lib/psql/webtables/??/GeneGroupProfile.psql create mode 100644 Model/lib/psql/webtables/??/GenomicSeqAttributes.psql create mode 100644 Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql create mode 100644 Model/lib/psql/webtables/??/OrthologousTranscripts.psql create mode 100644 Model/lib/psql/webtables/??/PANExtDbRls.psql create mode 100644 Model/lib/psql/webtables/??/PANIO.psql create mode 100644 Model/lib/psql/webtables/??/PANResults.psql create mode 100644 Model/lib/psql/webtables/??/PhyleticPattern.psql create mode 100644 Model/lib/psql/webtables/??/ProjectTaxon.psql create mode 100644 Model/lib/psql/webtables/??/SequenceAttributes.psql create mode 100644 Model/lib/psql/webtables/??/SequenceEnzymeClass.psql create mode 100644 Model/lib/psql/webtables/??/SequencePieceClosure.psql create mode 100644 Model/lib/psql/webtables/??/StudyIdDatasetId.psql create mode 100644 Model/lib/psql/webtables/??/TypeAheadCounts.psql create mode 100644 Model/lib/psql/webtables/MG/CompoundAttributes.psql create mode 100644 Model/lib/psql/webtables/MG/CompoundId.psql create mode 100644 Model/lib/psql/webtables/MG/CompoundProperties.psql create mode 100644 Model/lib/psql/webtables/MG/CompoundTypeAheads.psql create mode 100644 Model/lib/psql/webtables/MG/GroupDomainAttribute.psql create mode 100644 Model/lib/psql/webtables/MG/OntologyLevels.psql create mode 100644 Model/lib/psql/webtables/MG/PathwayAttributes.psql create mode 100644 Model/lib/psql/webtables/MG/PathwayCompounds.psql create mode 100644 Model/lib/psql/webtables/MG/PathwayNodes.psql create mode 100644 Model/lib/psql/webtables/MG/PathwayReactions.psql create mode 100644 Model/lib/psql/webtables/MO/ChIPchipTranscript.psql create mode 100644 Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql create mode 100644 Model/lib/psql/webtables/MO/ChrCopyNumbers.psql create mode 100644 Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql create mode 100644 Model/lib/psql/webtables/MO/CodingSequence.psql create mode 100644 Model/lib/psql/webtables/MO/CodingSequence_ix.psql create mode 100644 Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql create mode 100644 Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql create mode 100644 Model/lib/psql/webtables/MO/EqtlSpan.psql create mode 100644 Model/lib/psql/webtables/MO/EqtlSpan_ix.psql create mode 100644 Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql create mode 100644 Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql create mode 100644 Model/lib/psql/webtables/MO/EstAttributes.psql create mode 100644 Model/lib/psql/webtables/MO/EstAttributes_ix.psql create mode 100644 Model/lib/psql/webtables/MO/EstSequence.psql create mode 100644 Model/lib/psql/webtables/MO/EstSequence_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneAttributes.psql create mode 100644 Model/lib/psql/webtables/MO/GeneAttributes_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneCopyNumbers.psql create mode 100644 Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneGoTable.psql create mode 100644 Model/lib/psql/webtables/MO/GeneGoTable_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneGoTerms.psql create mode 100644 Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneId.psql create mode 100644 Model/lib/psql/webtables/MO/GeneId_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneIntJuncStats.psql create mode 100644 Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneIntronJunction.psql create mode 100644 Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneLocations.psql create mode 100644 Model/lib/psql/webtables/MO/GeneLocations_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql create mode 100644 Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneModelDump.psql create mode 100644 Model/lib/psql/webtables/MO/GeneModelDump_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GeneSummaryFilter.psql create mode 100644 Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GenomicSequenceId.psql create mode 100644 Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql create mode 100644 Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql create mode 100644 Model/lib/psql/webtables/MO/GoTermSummary.psql create mode 100644 Model/lib/psql/webtables/MO/GoTermSummary_ix.psql create mode 100644 Model/lib/psql/webtables/MO/IntronSupportLevel.psql create mode 100644 Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql create mode 100644 Model/lib/psql/webtables/MO/IntronUtrCoords.psql create mode 100644 Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql create mode 100644 Model/lib/psql/webtables/MO/NameMappingGIJ.psql create mode 100644 Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql create mode 100644 Model/lib/psql/webtables/MO/OrganismAbbreviation.psql create mode 100644 Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql create mode 100644 Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql create mode 100644 Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql create mode 100644 Model/lib/psql/webtables/MO/OrganismAttributes.psql create mode 100644 Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql create mode 100644 Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql create mode 100644 Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql create mode 100644 Model/lib/psql/webtables/MO/PathwayNodeGene.psql create mode 100644 Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql create mode 100644 Model/lib/psql/webtables/MO/PathwaysGeneTable.psql create mode 100644 Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql create mode 100644 Model/lib/psql/webtables/MO/PdbSimilarity.psql create mode 100644 Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql create mode 100644 Model/lib/psql/webtables/MO/Profile.psql create mode 100644 Model/lib/psql/webtables/MO/ProfileSamples.psql create mode 100644 Model/lib/psql/webtables/MO/ProfileSamples_ix.psql create mode 100644 Model/lib/psql/webtables/MO/ProfileType.psql create mode 100644 Model/lib/psql/webtables/MO/ProfileType_ix.psql create mode 100644 Model/lib/psql/webtables/MO/Profile_ix.psql create mode 100644 Model/lib/psql/webtables/MO/ProteinAttributes.psql create mode 100644 Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql create mode 100644 Model/lib/psql/webtables/MO/ProteinSequence.psql create mode 100644 Model/lib/psql/webtables/MO/ProteinSequence_ix.psql create mode 100644 Model/lib/psql/webtables/MO/RnaSeqStats.psql create mode 100644 Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql create mode 100644 Model/lib/psql/webtables/MO/SignalPeptideDomains.psql create mode 100644 Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql create mode 100644 Model/lib/psql/webtables/MO/TFBSGene.psql create mode 100644 Model/lib/psql/webtables/MO/TFBSGene_ix.psql create mode 100644 Model/lib/psql/webtables/MO/TaxonSpecies.psql create mode 100644 Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql create mode 100644 Model/lib/psql/webtables/MO/Taxonomy.psql create mode 100644 Model/lib/psql/webtables/MO/Taxonomy_ix.psql create mode 100644 Model/lib/psql/webtables/MO/TranscriptAttributes.psql create mode 100644 Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql create mode 100644 Model/lib/psql/webtables/MO/TranscriptCenDistance.psql create mode 100644 Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql create mode 100644 Model/lib/psql/webtables/MO/TranscriptPathway.psql create mode 100644 Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql create mode 100644 Model/lib/psql/webtables/MO/TranscriptSequence.psql create mode 100644 Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql create mode 100644 Model/lib/psql/webtables/MO/TransmembraneDomains.psql create mode 100644 Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql diff --git a/Model/lib/psql/webtables/??/AlphaFoldGenes.psql b/Model/lib/psql/webtables/??/AlphaFoldGenes.psql new file mode 100644 index 0000000000..164c150fcb --- /dev/null +++ b/Model/lib/psql/webtables/??/AlphaFoldGenes.psql @@ -0,0 +1,103 @@ + + + CREATE TABLE uniprotGenes AS + SELECT DISTINCT ed.name + , d.* + , edr.version + , aa.source_id + , pa.gene_source_id + , CASE WHEN (ed.name like '%SWISSPROT%' AND edr.version = 'xrefuniparc') THEN 1 + WHEN (ed.name like '%SPTREMBL%' AND edr.version = 'xrefuniparc') THEN 2 + WHEN (ed.name like '%SWISSPROT%' AND edr.version = 'xref_sprot_blastp') THEN 4 + WHEN (ed.name like '%SPTREMBL%' and edr.version = 'xref_trembl_blastp') THEN 5 + ELSE 6 END as rank + , (af.last_residue_index - af.first_residue_index + 1) as hit_length + FROM sres.dbref d + LEFT JOIN apidb.AlphaFold af ON d.primary_identifier = af.uniprot_id + , sres.externaldatabase ed + , sres.externaldatabaserelease edr + , dots.dbrefaafeature db + , dots.aafeature aa + , ProteinAttributes pa + WHERE (ed.name = 'Uniprot/SWISSPROT' OR ed.name = 'Uniprot/SPTREMBL') + AND (edr.version = 'xrefuniparc' OR edr.version = 'xref_sprot_blastp' OR edr.version = 'xref_trembl_blastp') + AND edr.external_database_id = ed.external_database_id + AND d.external_database_release_id = edr.external_database_release_id + AND db.db_ref_id = d.db_ref_id + AND aa.aa_feature_id = db.aa_feature_id + AND pa.source_id = aa.source_id + UNION + SELECT DISTINCT ed.name + , d.* + , edr.version + , na.source_id + , ta.gene_source_id + , 3 as rank + , (af.last_residue_index - af.first_residue_index + 1) as hit_length + FROM sres.dbref d + LEFT JOIN apidb.AlphaFold af ON d.primary_identifier = af.uniprot_id + , sres.externaldatabase ed + , sres.externaldatabaserelease edr + , dots.dbrefnafeature db + , dots.nafeature na + , TranscriptAttributes ta + WHERE ed.name like '%_dbxref_%niprot_%RSRC' + AND edr.external_database_id = ed.external_database_id + AND d.external_database_release_id = edr.external_database_release_id + AND db.db_ref_id = d.db_ref_id + AND na.na_feature_id = db.na_feature_id + AND (ta.transcript_source_id = na.source_id OR ta.gene_source_id = na.source_id) + + ; + + + + CREATE UNLOGGED TABLE minRank AS ( + SELECT gene_source_id + , MIN(rank) as min_rank + FROM uniprotGenes upg + WHERE hit_length is not null + GROUP BY gene_source_id + ) + + ; + + + + CREATE UNLOGGED TABLE alphaFoldHits AS ( + SELECT DISTINCT gene_source_id + , last_value(primary_identifier) over (PARTITION BY gene_source_id ORDER BY hit_length ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS uniprot_id + FROM ( + SELECT upg.* + FROM uniprotGenes upg + , minRank + WHERE upg.gene_source_id = minRank.gene_source_id + AND upg.rank = minRank.min_rank + ) t + ) + + ; + + + + CREATE TABLE AlphaFoldGenes AS ( + SELECT afh.gene_source_id + , af.uniprot_id + , af.source_id as alphafold_id + , af.alphafold_version + , af.first_residue_index + , af.last_residue_index + FROM apidb.alphafold af + , alphaFoldHits afh + WHERE afh.uniprot_id = af.uniprot_id + ) + + ; + + + + CREATE index AlphaFoldGenes_idx ON AlphaFoldGenes (gene_source_id, uniprot_id) + + + ; + diff --git a/Model/lib/psql/webtables/??/AssociatedDataset.psql b/Model/lib/psql/webtables/??/AssociatedDataset.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/??/DatasetDetail.psql b/Model/lib/psql/webtables/??/DatasetDetail.psql new file mode 100644 index 0000000000..1ec5eed7be --- /dev/null +++ b/Model/lib/psql/webtables/??/DatasetDetail.psql @@ -0,0 +1,58 @@ + + + CREATE TABLE DatasetDetail AS + SELECT dataset_presenter_id, + name || ' ' || category || ' ' || usage || ' ' || + caveat || ' ' || acknowledgement || ' ' || type || ' ' || subtype + ||' ' || summary || ' ' || description || ' ' || contact || ' ' || + institution || ' ' || pubmed_id || ' ' || citation as search_string + FROM ( + SELECT + sub.dataset_presenter_id as dataset_presenter_id, + sub.name as name, + sub.category as category, + sub.usage as usage, + sub.caveat as caveat, + sub.acknowledgement as acknowledgement, + sub.type as type, + sub.subtype as subtype, + sub.contact, + sub.institution, + sub.pubmed_id, + sub.citation, + dp.summary, + dp.description + FROM DatasetPresenter dp, + ( + SELECT DISTINCT + dp.dataset_presenter_id as dataset_presenter_id, + dp.display_name as name, + dp.display_category as category, + dp.usage as usage, + dp.caveat as caveat, + dp.acknowledgement as acknowledgement, + dp.type as type, + dp.subtype as subtype, + dc.name as contact, + dc.affiliation as institution, + string_agg(dpub.pmid, ' ' ORDER BY dpub.pmid) as pubmed_id, + -- CHECK AND FIX - regexp_like ISSUE + --string_agg(CASE WHEN REGEXP_LIKE(dpub.citation, '[[:digit:]]{4};') + -- THEN substr(citation, 1, regexp_instr(citation, '[[:digit:]]{4};' ) - 1) + -- ELSE dpub.citation + -- END , ' ' ORDER BY dpub.citation) as citation + string_agg(dpub.citation, ' ' ORDER BY dpub.citation) as citation + FROM DatasetPresenter dp, DatasetContact dc, + DatasetPublication dpub + WHERE dp.dataset_presenter_id = dc.dataset_presenter_id + AND dp.dataset_presenter_id = dpub.dataset_presenter_id + AND dc.is_primary_contact = true + GROUP by dp.dataset_presenter_id, dp.display_name,dp.display_category, + dp.usage,dp.caveat,dp.acknowledgement,dp.type,dp.subtype,dc.name, + dc.affiliation + ) sub + WHERE dp.dataset_presenter_id = sub.dataset_presenter_id + ) t + + ; + diff --git a/Model/lib/psql/webtables/??/DatasetPresenter.psql b/Model/lib/psql/webtables/??/DatasetPresenter.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/??/DomainAssignment.psql b/Model/lib/psql/webtables/??/DomainAssignment.psql new file mode 100644 index 0000000000..7cb927d97e --- /dev/null +++ b/Model/lib/psql/webtables/??/DomainAssignment.psql @@ -0,0 +1,69 @@ + + + create table DomainAssignment as + select sa.full_id, sa.group_name, + r.interpro_primary_id as accession, + r.interpro_desc as description, + CAST (NULL as NUMERIC) as domain_index, + sa.aa_sequence_id, + r.interpro_start_min as start_min, + r.interpro_end_min as end_max + from SequenceAttributes sa, apidb.interproresults r + where sa.full_id = r.protein_source_id + and upper(r.interpro_db_name) = 'PFAM' + + ; + + + + create index domain_accession_ix + on DomainAssignment (accession, full_id, group_name) + + ; + + + + create table domainIndex as + select row_number() OVER () as domain_index, accession + from (select distinct accession + from DomainAssignment + order by accession) + + ; + + + + create index domainIdxIdx on DomainIndex(accession, domain_index) + + ; + + + + update DomainAssignment da + set domain_index = (select domain_index + from DomainIndex + where accession = da.accession) + + ; + + + + create index domain_ix_ix + on DomainAssignment (domain_index, accession, full_id) + + ; + + + + create index domain_group_ix + on DomainAssignment (group_name, accession, full_id) + + ; + + + + create index domain_seq_ix + on DomainAssignment (aa_sequence_id, accession, full_id, group_name) + + ; + diff --git a/Model/lib/psql/webtables/??/EdaGeneGraph.psql b/Model/lib/psql/webtables/??/EdaGeneGraph.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/??/EupathBuildDates.psql b/Model/lib/psql/webtables/??/EupathBuildDates.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql b/Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql new file mode 100644 index 0000000000..0b8856e4e5 --- /dev/null +++ b/Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql @@ -0,0 +1,46 @@ + + + CREATE TABLE ExternalDbDatasetPresenter AS + SELECT ed.external_database_id, ed.name AS external_database_name, + edr.external_database_release_id, SUBSTR(edr.version, 1, 40) AS external_database_version, + dsp.dataset_presenter_id, dsp.name AS dataset_presenter_name, + dsp.display_name AS dataset_presenter_display_name + FROM sres.externalDatabaseRelease edr, sres.externalDatabase ed, DatasetPresenter dsp + WHERE ed.external_database_id = edr.external_database_id + AND (ed.name = dsp.name + OR ed.name LIKE dsp.dataset_name_pattern) + ORDER BY ed.name + + ; + + + + create index edd_rlsidix + on ExternalDbDatasetPresenter + (external_database_release_id, external_database_id, external_database_name, + dataset_presenter_id, dataset_presenter_name, dataset_presenter_display_name) + + + ; + + + + create index edd_dsidix + on ExternalDbDatasetPresenter + (dataset_presenter_id, external_database_id, external_database_release_id, + external_database_name, dataset_presenter_name, dataset_presenter_display_name) + + + ; + + + + create index edd_dsnameix + on ExternalDbDatasetPresenter + (dataset_presenter_name, dataset_presenter_id, external_database_id, + external_database_release_id, external_database_name, external_database_version, + dataset_presenter_display_name) + + + ; + diff --git a/Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql b/Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql new file mode 100644 index 0000000000..1dd9673b23 --- /dev/null +++ b/Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql @@ -0,0 +1,60 @@ + + + CREATE TABLE ExternalSequenceTaxonRank AS + WITH organism_rank AS ( + SELECT tn1.taxon_id as organism, tn2.name as parent_organism, + tn2.taxon_id as parent_organism_id, r.rank + FROM sres.TaxonName tn1, sres.TaxonName tn2, + ( + WITH RECURSIVE cte AS ( + SELECT taxon_id as input, taxon_id, rank, parent_id + FROM sres.taxon + WHERE taxon_id IN ( + SELECT taxon_id FROM dots.externalaasequence + UNION + SELECT taxon_id FROM apidb.taxonstring + ) + UNION + SELECT cte.input, t.taxon_id, t.rank, t.parent_id + FROM sres.taxon t, cte + WHERE cte.parent_id = t.taxon_id + ) + SELECT input, taxon_id, rank + FROM cte + ) r + WHERE r.input = tn1.taxon_id + and r.taxon_id = tn2.taxon_id + and tn1.name_class = 'scientific name' + and tn2.name_class = 'scientific name' + and r.rank in ('phylum', 'family','genus', 'species', 'superkingdom','kingdom', 'class', 'order') + ) + SELECT organisms.organism, + coalesce(superkingdom.parent_organism, 'N/A') as superkingdom, + superkingdom.parent_organism_id as superkingdom_id, + coalesce(kingdom.parent_organism, 'N/A') as kingdom, + kingdom.parent_organism_id as kingdom_id, + coalesce (phylum.parent_organism, 'N/A') as phylum, + phylum.parent_organism_id as phylum_id, + coalesce (class.parent_organism, 'N/A') as class, + class.parent_organism_id as class_id, + coalesce (family.parent_organism, 'N/A') as family, + family.parent_organism_id as family_id, + coalesce (rank_order.parent_organism, 'N/A') as rank_order, + rank_order.parent_organism_id as rank_order_id, + coalesce ( genus.parent_organism, 'N/A') as genus, + genus.parent_organism_id as genus_id, + coalesce(species.parent_organism, 'N/A') as species, + species.parent_organism_id as species_id + FROM (SELECT DISTINCT organism FROM organism_rank) organisms + LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'phylum') phylum ON organisms.organism = phylum.organism + LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'genus') genus ON organisms.organism = genus.organism + LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'species') species ON organisms.organism = species.organism + LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'kingdom') kingdom ON organisms.organism = kingdom.organism + LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'superkingdom') superkingdom ON organisms.organism = superkingdom.organism + LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'class') class ON organisms.organism = class.organism + LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'family') family ON organisms.organism = family.organism + LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'order') rank_order ON organisms.organism = rank_order.organism + ORDER BY organism, species, genus + + ; + diff --git a/Model/lib/psql/webtables/??/GeneGroupProfile.psql b/Model/lib/psql/webtables/??/GeneGroupProfile.psql new file mode 100644 index 0000000000..e91fd35148 --- /dev/null +++ b/Model/lib/psql/webtables/??/GeneGroupProfile.psql @@ -0,0 +1,30 @@ + + + create table GeneGroupProfile as + select distinct other_gene.source_id, p.dataset_name, + this_gene.source_id as profile_graph_id + from OrthologousTranscripts ot + , Profile p + , GeneAttributes this_gene + , GeneAttributes other_gene + where p.source_id = ot.source_id + and ot.source_id = this_gene.source_id + and ot.ortho_gene_source_id = other_gene.source_id + and this_gene.species = other_gene.species + and ot.is_syntenic = 1 + union + select ga.source_id, p.dataset_name, p.source_id as profile_graph_id + from Profile p, GeneAttributes ga + where p.source_id = ga.source_id + + ; + + + + create index ggp_ix + on GeneGroupProfile + (source_id, dataset_name, profile_graph_id) + + + ; + diff --git a/Model/lib/psql/webtables/??/GenomicSeqAttributes.psql b/Model/lib/psql/webtables/??/GenomicSeqAttributes.psql new file mode 100644 index 0000000000..6922acea92 --- /dev/null +++ b/Model/lib/psql/webtables/??/GenomicSeqAttributes.psql @@ -0,0 +1,102 @@ + + + CREATE TABLE :ORG_ABBREVGenomicSeqAttributes AS + SELECT + cast(apidb.prefixed_project_id(tn.name, ':ORG_ABBREV') as varchar(20)) as project_id, + SUBSTR(sequence.source_id, 1, 60) AS source_id, sequence.a_count, + sequence.c_count, sequence.g_count, sequence.t_count, + (sequence.length + - (sequence.a_count + sequence.c_count + sequence.g_count + sequence.t_count)) + AS other_count, + sequence.length, + to_char((sequence.a_count + sequence.t_count) / sequence.length * 100, '99.99') + AS at_percent, + SUBSTR(tn.name, 1, 100) AS organism, + taxon.ncbi_tax_id, + taxon.taxon_id, + CASE WHEN sequence.description IS NULL THEN SUBSTR(tn.name, 1, 100) + ELSE SUBSTR(sequence.description, 1, 400) + END AS sequence_description, + SUBSTR(genbank.genbank_accession, 1, 20) AS genbank_accession, + SUBSTR(db.database_version, 1, 30) AS database_version, db.database_name, + SUBSTR(sequence.chromosome, 1, 20) AS chromosome, + sequence.external_database_release_id, sequence.sequence_ontology_id, + sequence.chromosome_order_num, so.source_id as so_id, so.name as sequence_type, + coalesce(virtualization.is_top_level, 1) as is_top_level, + sequence.na_sequence_id, organism.genome_source, + organism.name_for_filenames, coalesce(msa.has_msa, 0) as has_msa + FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon.taxon_id = organism.taxon_id, + sres.OntologyTerm so, + ( SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description, + a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id + FROM dots.ExternalNaSequence + UNION + SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description, + a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id + FROM dots.VirtualSequence + ) sequence + LEFT JOIN + (SELECT drns.na_sequence_id, max(dr.primary_identifier) AS genbank_accession + FROM dots.dbrefNaSequence drns, sres.DbRef dr, + sres.ExternalDatabaseRelease gb_edr, sres.ExternalDatabase gb_ed + WHERE drns.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id + = gb_edr.external_database_release_id + AND gb_edr.external_database_id = gb_ed.external_database_id + AND gb_ed.name = 'GenBank' + GROUP BY drns.na_sequence_id + ) genbank ON sequence.na_sequence_id = genbank.na_sequence_id + LEFT JOIN + (SELECT edr.external_database_release_id, + edr.version AS database_version, ed.name AS database_name + FROM sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr + WHERE edr.external_database_id = ed.external_database_id + ) db ON sequence.external_database_release_id = db.external_database_release_id + LEFT JOIN + (SELECT distinct piece_na_sequence_id, 0 as is_top_level + FROM :ORG_ABBREVSequencePieceClosure + ) virtualization ON sequence.na_sequence_id = virtualization.piece_na_sequence_id + LEFT JOIN + (SELECT a_na_sequence_id as na_sequence_id, 1 as has_msa + FROM apidb.Synteny syn + GROUP BY a_na_sequence_id + ) msa ON sequence.na_sequence_id = msa.na_sequence_id + LEFT JOIN + (SELECT taxon_id, max(name) as name + FROM sres.TaxonName + WHERE name_class = 'scientific name' + GROUP BY taxon_id + ) tn ON sequence.taxon_id = tn.taxon_id + WHERE + sequence.taxon_id = taxon.taxon_id + AND sequence.sequence_ontology_id = so.ontology_term_id + AND (sequence.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) + AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle') + ORDER BY organism, source_id + + ; + + + + create unique index pk_SeqAttr_ ON :ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id) + + ; + + + + create unique index SeqAttr_source_id ON :ORG_ABBREVGenomicSeqAttributes (source_id) + + ; + + + + create unique index SeqAttr_naseqid ON :ORG_ABBREVGenomicSeqAttributes (na_sequence_id) + + ; + + + + create unique index SeqAttr_taxsrc_id ON :ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id) + + ; + diff --git a/Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql b/Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql new file mode 100644 index 0000000000..a7d484850c --- /dev/null +++ b/Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql @@ -0,0 +1,20 @@ + + + CREATE table GroupPhylogeneticProfile as + SELECT rep.orthomcl_name, pp.profile_string + FROM apidb.PhylogeneticProfile pp, + (SELECT orthomcl_name, max(source_id) as source_id + FROM GeneAttributes + GROUP BY orthomcl_name) rep + WHERE rep.source_id = pp.source_id + + ; + + + + create index group_pp_ix + on GroupPhylogeneticProfile (orthomcl_name) + + + ; + diff --git a/Model/lib/psql/webtables/??/OrthologousTranscripts.psql b/Model/lib/psql/webtables/??/OrthologousTranscripts.psql new file mode 100644 index 0000000000..d6e7191d1e --- /dev/null +++ b/Model/lib/psql/webtables/??/OrthologousTranscripts.psql @@ -0,0 +1,73 @@ + + + create UNLOGGED table SyntenicPairs as + select distinct ga.na_feature_id, sg.syn_na_feature_id + from apidb.SyntenicGene sg, GeneAttributes ga + where sg.na_sequence_id = ga.na_sequence_id + and sg.end_max >= ga.start_min + and sg.start_min <= ga.end_max + + ; + + + + create index SynPair_idx + on SyntenicPairs (na_feature_id, syn_na_feature_id) + + + ; + + + + create table OrthologousTranscripts as + with all_pairs + as (select ga.source_id + , ga.project_id + , ga.na_feature_id + , ota.source_id as ortho_source_id + , ota.gene_source_id as ortho_gene_source_id + , ota.project_id as ortho_project_id + , ota.gene_na_feature_id as ortho_na_feature_id + , ota.transcript_product as ortho_product + , ota.protein_length + , ga.name as ortho_name + , ota.organism as ortho_organism + , ota.taxon_id as ortho_taxon_id + , o.is_reference_strain + from Geneattributes ga + , TranscriptAttributes ota + , apidb.Organism o + where ga.ORTHOMCL_NAME = ota.ORTHOMCL_NAME + and ota.taxon_id = o.taxon_id + ), + syn_pairs + as (select na_feature_id, syn_na_feature_id, 1 as is_syntenic from SyntenicPairs + ) + select all_pairs.* + , coalesce(syn_pairs.is_syntenic, 0) as is_syntenic + from all_pairs + left join syn_pairs + on all_pairs.na_feature_id = syn_pairs.na_feature_id + and all_pairs.ortho_na_feature_id = syn_pairs.syn_na_feature_id + + ; + + + + create index ot_idx + on OrthologousTranscripts (source_id, project_id, is_syntenic desc, ortho_source_id, + ortho_project_id, ortho_gene_source_id, ortho_product, + ortho_name, ortho_organism, ortho_taxon_id, is_reference_strain) + + + ; + + + + create index ot_smol_idx + on OrthologousTranscripts (is_syntenic, ortho_taxon_id, source_id, ortho_source_id, + ortho_project_id, ortho_gene_source_id) + + + ; + diff --git a/Model/lib/psql/webtables/??/PANExtDbRls.psql b/Model/lib/psql/webtables/??/PANExtDbRls.psql new file mode 100644 index 0000000000..82b469f127 --- /dev/null +++ b/Model/lib/psql/webtables/??/PANExtDbRls.psql @@ -0,0 +1,36 @@ + + + CREATE TABLE :ORG_ABBREVPANExtDbRls AS + SELECT distinct protocol_app_node_id as pan_id, external_database_release_id, name as dataset_name + FROM ( + SELECT + sl.protocol_app_node_id + , s.external_database_release_id + , d.name + FROM + study.nodeSet s + , study.nodeNodeSet sl + , sres.externaldatabaserelease r + , sres.externaldatabase d + WHERE + s.external_database_release_id = r.external_database_release_id + and r.external_database_id = d.external_database_id + and s.node_set_id = sl.node_set_id + and s.external_database_release_id is not null + UNION + SELECT pan.protocol_app_node_id + , pan.external_database_release_id + , d.name + FROM study.protocolappnode pan + , sres.externaldatabaserelease r + , sres.externaldatabase d + WHERE + pan.external_database_release_id = r.external_database_release_id + and r.external_database_id = d.external_database_id + and pan.external_database_release_id is not null + ) t + WHERE (name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + ORDER BY external_database_release_id, protocol_app_node_id + + ; + diff --git a/Model/lib/psql/webtables/??/PANIO.psql b/Model/lib/psql/webtables/??/PANIO.psql new file mode 100644 index 0000000000..fa820f9d4b --- /dev/null +++ b/Model/lib/psql/webtables/??/PANIO.psql @@ -0,0 +1,59 @@ + + + CREATE TABLE :ORG_ABBREVPANIO AS + SELECT DISTINCT io.* + FROM ( + SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id, + o.protocol_app_node_id output_pan_id, + in_type.source_id as input_pan_type_source_id, + --in_type.name as input_pan_type, + in_type.ontology_term_id as input_pan_type_id, + out_type.source_id as output_pan_type_source_id, + --out_type.name as output_pan_type, + out_type.ontology_term_id as output_pan_type_id + FROM study.ProtocolApp pa, study.Input i, study.Output o, + study.ProtocolAppNode in_pan LEFT JOIN sres.OntologyTerm in_type ON in_pan.type_id = in_type.ontology_term_id, + study.ProtocolAppNode out_pan LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id + WHERE i.protocol_app_id = pa.protocol_app_id + AND o.protocol_app_id = pa.protocol_app_id + AND i.protocol_app_node_id = in_pan.protocol_app_node_id + AND o.protocol_app_node_id = out_pan.protocol_app_node_id + ) io, :ORG_ABBREVpanextdbrls panExtDbRls + WHERE io.input_pan_id = panExtDbRls.pan_id -- the input and outputs will have same dataset in prefix enabled mode only + AND (panExtDbRls.dataset_name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + ORDER BY io.input_pan_id, io.output_pan_id + + ; + + + + create index :ORG_ABBREVpainio2_iix on :ORG_ABBREVPANIO + (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) + + + ; + + + + create index :ORG_ABBREVpainio2_oix on :ORG_ABBREVPANIO + (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) + + + ; + + + + create index :ORG_ABBREVpainio2_otypeix on :ORG_ABBREVPANIO + (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id) + + + ; + + + + create index :ORG_ABBREVpainio2_itypeix on :ORG_ABBREVPANIO + (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id) + + + ; + diff --git a/Model/lib/psql/webtables/??/PANResults.psql b/Model/lib/psql/webtables/??/PANResults.psql new file mode 100644 index 0000000000..de67af4813 --- /dev/null +++ b/Model/lib/psql/webtables/??/PANResults.psql @@ -0,0 +1,91 @@ + + + CREATE TABLE :ORG_ABBREVPANResults AS + SELECT DISTINCT r.* + FROM ( + SELECT protocol_app_node_id as pan_id, 'Results::NAFeatureDiffResult' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NAFeatureDiffResult) + UNION + SELECT protocol_app_node_id, 'Results::ReporterIntensity' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterIntensity) + UNION + SELECT protocol_app_node_id, 'Results::SegmentResult' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SegmentResult) + UNION + SELECT protocol_app_node_id, 'Results::CompoundMassSpec' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.CompoundMassSpec) + UNION + SELECT protocol_app_node_id, 'Results::NaFeatureHostResponse' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NaFeatureHostResponse) + UNION + SELECT protocol_app_node_id, 'ApiDB::ChrCopyNumber' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM ApiDB.ChrCopyNumber) + UNION + SELECT protocol_app_node_id, 'ApiDB::GeneCopyNumber' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM ApiDB.GeneCopyNumber) + UNION + SELECT protocol_app_node_id, 'Results::NAFeatureExpression' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.NAFeatureExpression) + UNION + SELECT protocol_app_node_id, 'Results::EditingEvent' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.EditingEvent) + UNION + SELECT protocol_app_node_id, 'Results::FamilyDiffResult' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.FamilyDiffResult) + UNION + SELECT protocol_app_node_id, 'Results::FamilyExpression' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.FamilyExpression) + UNION + SELECT protocol_app_node_id, 'Results::GeneDiffResult' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneDiffResult) + UNION + SELECT protocol_app_node_id, 'Results::GeneExpression' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneExpression) + UNION + SELECT protocol_app_node_id, 'Results::GeneSimilarity' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.GeneSimilarity) + UNION + SELECT protocol_app_node_id, 'Results::ReporterDiffResult' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterDiffResult) + UNION + SELECT protocol_app_node_id, 'Results::ReporterExpression' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.ReporterExpression) + UNION + SELECT protocol_app_node_id, 'Results::RnaDiffResult' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.RnaDiffResult) + UNION + SELECT protocol_app_node_id, 'Results::RnaExpression' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.RnaExpression) + UNION + SELECT protocol_app_node_id, 'Results::LineageAbundance' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.LineageAbundance) + UNION + SELECT protocol_app_node_id, 'Results::SegmentDiffResult' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SegmentDiffResult) + UNION + SELECT protocol_app_node_id, 'Results::SeqVariation' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM Results.SeqVariation) + UNION + SELECT protocol_app_node_id, 'ApiDB::SequenceVariation' as result_table FROM study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM study.ProtocolAppNode WHERE name like '% (Sequence Variation)') + UNION + SELECT protocol_app_node_id, 'ApiDB::MassSpecSummary' as result_table from study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.MASSSPECSUMMARY) + UNION + SELECT protocol_app_node_id, 'ApiDB::IntronJunction' as result_table from study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.IntronJunction) + UNION + SELECT protocol_app_node_id, 'ApiDB::RflpGenotype' as result_table from study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.RflpGenotype) + UNION + SELECT protocol_app_node_id, 'ApiDB::RflpGenotypeNumber' as result_table from study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.RflpGenotypeNumber) + UNION + SELECT protocol_app_node_id, 'ApiDB::CrisprPhenotype' as result_table from study.ProtocolAppNode + WHERE protocol_app_node_id in (select protocol_app_node_id FROM apidb.crisprphenotype) + ) r, :ORG_ABBREVpanextdbrls panExtDbRls + WHERE r.pan_id = panExtDbRls.pan_id + AND (panExtDbRls.dataset_name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + + ; + diff --git a/Model/lib/psql/webtables/??/PhyleticPattern.psql b/Model/lib/psql/webtables/??/PhyleticPattern.psql new file mode 100644 index 0000000000..6b5f20ca3c --- /dev/null +++ b/Model/lib/psql/webtables/??/PhyleticPattern.psql @@ -0,0 +1,395 @@ + + +CREATE TABLE PhyleticPattern AS + (SELECT actual.group_name, + actual.alveolata as alveolata_actual, + total.alveolata as alveolata_total, + round(100*actual.alveolata/total.alveolata,0) AS alveolata_percent, + actual.archaea as archaea_actual, + total.archaea as archaea_total, + round(100*actual.archaea/total.archaea,0) AS archaea_percent, + actual.amoeba as amoeba_actual, + total.amoeba as amoeba_total, + round(100*actual.amoeba/total.amoeba,0) AS amoeba_percent, + actual.bacteria as bacteria_actual, + total.bacteria as bacteria_total, + round(100*actual.bacteria/total.bacteria,0) AS bacteria_percent, + actual.fungi as fungi_actual, + total.fungi as fungi_total, + round(100*actual.fungi/total.fungi,0) AS fungi_percent, + actual.euglenozoa as euglenozoa_actual, + total.euglenozoa as euglenozoa_total, + round(100*actual.euglenozoa/total.euglenozoa,0) AS euglenozoa_percent, + actual.metazoa as metazoa_actual, + total.metazoa as metazoa_total, + round(100*actual.metazoa/total.metazoa,0) AS metazoa_percent, + actual.viridiplantae as viridiplantae_actual, + total.viridiplantae as viridiplantae_total, + round(100*actual.viridiplantae/total.viridiplantae,0) AS viridiplantae_percent, + actual.other_eukaryotes as other_eukaryotes_actual, + total.other_eukaryotes as other_eukaryotes_total, + round(100*actual.other_eukaryotes/total.other_eukaryotes,0) AS other_eukaryotes_percent, + actual.bacteria_firm as bacteria_firm_actual, + total.bacteria_firm as bacteria_firm_total, + actual.bacteria_proa as bacteria_proa_actual, + total.bacteria_proa as bacteria_proa_total, + actual.bacteria_prob as bacteria_prob_actual, + total.bacteria_prob as bacteria_prob_total, + actual.bacteria_prod as bacteria_prod_actual, + total.bacteria_prod as bacteria_prod_total, + actual.bacteria_proe as bacteria_proe_actual, + total.bacteria_proe as bacteria_proe_total, + actual.bacteria_prog as bacteria_prog_actual, + total.bacteria_prog as bacteria_prog_total, + actual.bacteria_obac as bacteria_obac_actual, + total.bacteria_obac as bacteria_obac_total, + actual.archaea_arch as archaea_arch_actual, + total.archaea_arch as archaea_arch_total, + actual.archaea_eury as archaea_eury_actual, + total.archaea_eury as archaea_eury_total, + actual.archaea_cren as archaea_cren_actual, + total.archaea_cren as archaea_cren_total, + actual.archaea_nano as archaea_nano_actual, + total.archaea_nano as archaea_nano_total, + actual.archaea_kora as archaea_kora_actual, + total.archaea_kora as archaea_kora_total, + --oeuk_genera.html as oeuk_genera_html, + --eugl_genera.html as eugl_genera_html, + --amoe_genera.html as amoe_genera_html, + actual.alveolata_alve as alveolata_alve_actual, + total.alveolata_alve as alveolata_alve_total, + actual.alveolata_cili as alveolata_cili_actual, + total.alveolata_cili as alveolata_cili_total, + actual.alveolata_apic as alveolata_apic_actual, + total.alveolata_apic as alveolata_apic_total, + actual.alveolata_cocc as alveolata_cocc_actual, + total.alveolata_cocc as alveolata_cocc_total, + actual.alveolata_haem as alveolata_haem_actual, + total.alveolata_haem as alveolata_haem_total, + actual.alveolata_piro as alveolata_piro_actual, + total.alveolata_piro as alveolata_piro_total, + actual.viridiplantae_stre as viridiplantae_stre_actual, + total.viridiplantae_stre as viridiplantae_stre_total, + actual.viridiplantae_chlo as viridiplantae_chlo_actual, + total.viridiplantae_chlo as viridiplantae_chlo_total, + actual.viridiplantae_rhod as viridiplantae_rhod_actual, + total.viridiplantae_rhod as viridiplantae_rhod_total, + actual.viridiplantae_cryp as viridiplantae_cryp_actual, + total.viridiplantae_cryp as viridiplantae_cryp_total, + actual.fungi_fung as fungi_fung_actual, + total.fungi_fung as fungi_fung_total, + actual.fungi_micr as fungi_micr_actual, + total.fungi_micr as fungi_micr_total, + actual.fungi_basi as fungi_basi_actual, + total.fungi_basi as fungi_basi_total, + actual.fungi_asco as fungi_asco_actual, + total.fungi_asco as fungi_asco_total, + actual.fungi_muco as fungi_muco_actual, + total.fungi_muco as fungi_muco_total, + actual.fungi_chyt as fungi_chyt_actual, + total.fungi_chyt as fungi_chyt_total, + actual.metazoa_omet as metazoa_omet_actual, + total.metazoa_omet as metazoa_omet_total, + actual.metazoa_nema as metazoa_nema_actual, + total.metazoa_nema as metazoa_nema_total, + actual.metazoa_arth as metazoa_arth_actual, + total.metazoa_arth as metazoa_arth_total, + actual.metazoa_chor as metazoa_chor_actual, + total.metazoa_chor as metazoa_chor_total, + actual.metazoa_acti as metazoa_acti_actual, + total.metazoa_acti as metazoa_acti_total, + actual.metazoa_aves as metazoa_aves_actual, + total.metazoa_aves as metazoa_aves_total, + actual.metazoa_mamm as metazoa_mamm_actual, + total.metazoa_mamm as metazoa_mamm_total, + actual.metazoa_tuni as metazoa_tuni_actual, + total.metazoa_tuni as metazoa_tuni_total + FROM + (SELECT SUM(CASE clade WHEN 'alveolata' THEN num ELSE 0 END) as alveolata, + SUM(CASE clade WHEN 'archaea' THEN num ELSE 0 END) as archaea, + SUM(CASE clade WHEN 'amoeba' THEN num ELSE 0 END) as amoeba, + SUM(CASE clade WHEN 'bacteria' THEN num ELSE 0 END) as bacteria, + SUM(CASE clade WHEN 'fungi' THEN num ELSE 0 END) as fungi, + SUM(CASE clade WHEN 'euglenozoa' THEN num ELSE 0 END) as euglenozoa, + SUM(CASE clade WHEN 'metazoa' THEN num ELSE 0 END) as metazoa, + SUM(CASE clade WHEN 'viridiplantae' THEN num ELSE 0 END) as viridiplantae, + SUM(CASE clade WHEN 'other_eukaryotes' THEN num ELSE 0 END) as other_eukaryotes, + SUM(CASE clade WHEN 'FIRM' THEN num ELSE 0 END) as bacteria_firm, + SUM(CASE clade WHEN 'PROA' THEN num ELSE 0 END) as bacteria_proa, + SUM(CASE clade WHEN 'PROB' THEN num ELSE 0 END) as bacteria_prob, + SUM(CASE clade WHEN 'PROD' THEN num ELSE 0 END) as bacteria_prod, + SUM(CASE clade WHEN 'PROE' THEN num ELSE 0 END) as bacteria_proe, + SUM(CASE clade WHEN 'PROG' THEN num ELSE 0 END) as bacteria_prog, + SUM(CASE clade WHEN 'OBAC' THEN num ELSE 0 END) as bacteria_obac, + SUM(CASE clade WHEN 'ARCH' THEN num ELSE 0 END) as archaea_arch, + SUM(CASE clade WHEN 'EURY' THEN num ELSE 0 END) as archaea_eury, + SUM(CASE clade WHEN 'CREN' THEN num ELSE 0 END) as archaea_cren, + SUM(CASE clade WHEN 'NANO' THEN num ELSE 0 END) as archaea_nano, + SUM(CASE clade WHEN 'KORA' THEN num ELSE 0 END) as archaea_kora, + SUM(CASE clade WHEN 'ALVE' THEN num ELSE 0 END) as alveolata_alve, + SUM(CASE clade WHEN 'CILI' THEN num ELSE 0 END) as alveolata_cili, + SUM(CASE clade WHEN 'APIC' THEN num ELSE 0 END) as alveolata_apic, + SUM(CASE clade WHEN 'COCC' THEN num ELSE 0 END) as alveolata_cocc, + SUM(CASE clade WHEN 'HAEM' THEN num ELSE 0 END) as alveolata_haem, + SUM(CASE clade WHEN 'PIRO' THEN num ELSE 0 END) as alveolata_piro, + SUM(CASE clade WHEN 'STRE' THEN num ELSE 0 END) as viridiplantae_stre, + SUM(CASE clade WHEN 'CHLO' THEN num ELSE 0 END) as viridiplantae_chlo, + SUM(CASE clade WHEN 'RHOD' THEN num ELSE 0 END) as viridiplantae_rhod, + SUM(CASE clade WHEN 'CRYP' THEN num ELSE 0 END) as viridiplantae_cryp, + SUM(CASE clade WHEN 'FUNG' THEN num ELSE 0 END) as fungi_fung, + SUM(CASE clade WHEN 'MICR' THEN num ELSE 0 END) as fungi_micr, + SUM(CASE clade WHEN 'BASI' THEN num ELSE 0 END) as fungi_basi, + SUM(CASE clade WHEN 'ASCO' THEN num ELSE 0 END) as fungi_asco, + SUM(CASE clade WHEN 'MUCO' THEN num ELSE 0 END) as fungi_muco, + SUM(CASE clade WHEN 'CHYT' THEN num ELSE 0 END) as fungi_chyt, + SUM(CASE clade WHEN 'OMET' THEN num ELSE 0 END) as metazoa_omet, + SUM(CASE clade WHEN 'NEMA' THEN num ELSE 0 END) as metazoa_nema, + SUM(CASE clade WHEN 'ARTH' THEN num ELSE 0 END) as metazoa_arth, + SUM(CASE clade WHEN 'CHOR' THEN num ELSE 0 END) as metazoa_chor, + SUM(CASE clade WHEN 'ACTI' THEN num ELSE 0 END) as metazoa_acti, + SUM(CASE clade WHEN 'AVES' THEN num ELSE 0 END) as metazoa_aves, + SUM(CASE clade WHEN 'MAMM' THEN num ELSE 0 END) as metazoa_mamm, + SUM(CASE clade WHEN 'TUNI' THEN num ELSE 0 END) as metazoa_tuni + FROM ( + (WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'BACT' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'bacteria' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + + ) UNION ( + WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'ARCH' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'archaea' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + + ) UNION ( + WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'ALVE' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'alveolata' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + ) UNION ( + WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'AMOE' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'amoeba' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + + ) UNION ( + WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'EUGL' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'euglenozoa' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + + ) UNION ( + WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'VIRI' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'viridiplantae' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + + ) UNION ( + WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'FUNG' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'fungi' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + + ) UNION ( + WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'META' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'metazoa' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + + ) UNION ( + WITH RECURSIVE TaxonHierarchy AS ( + SELECT orthomcl_clade_id, parent_id + FROM apidb.OrthomclClade + WHERE orthomcl_clade_id IN ( + SELECT orthomcl_clade_id + FROM apidb.OrthomclClade + WHERE three_letter_abbrev = 'OEUK' + ) UNION ALL + SELECT child.orthomcl_clade_id,child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT 'other_eukaryotes' AS clade, COUNT(*) AS num + FROM TaxonHierarchy th + JOIN apidb.OrthomclClade c ON th.orthomcl_clade_id = c.orthomcl_clade_id + WHERE c.core_peripheral IN ('C', 'P') + + ) UNION ( + SELECT b.three_letter_abbrev AS clade,a.num + FROM (SELECT parent_id, COUNT(orthomcl_clade_id) AS num + FROM apidb.OrthomclClade + WHERE core_peripheral in ('C','P') + GROUP BY parent_id) a, + apidb.OrthomclClade b + WHERE a.parent_id = b.orthomcl_clade_id + ) + )) total, + (SELECT name as group_name, + SUM(CASE three_letter_abbrev WHEN 'ALVE' THEN number_of_taxa ELSE 0 END) as alveolata, + SUM(CASE three_letter_abbrev WHEN 'ARCH' THEN number_of_taxa ELSE 0 END) as archaea, + SUM(CASE three_letter_abbrev WHEN 'AMOE' THEN number_of_taxa ELSE 0 END) as amoeba, + SUM(CASE three_letter_abbrev WHEN 'BACT' THEN number_of_taxa ELSE 0 END) as bacteria, + SUM(CASE three_letter_abbrev WHEN 'FUNG' THEN number_of_taxa ELSE 0 END) as fungi, + SUM(CASE three_letter_abbrev WHEN 'EUGL' THEN number_of_taxa ELSE 0 END) as euglenozoa, + SUM(CASE three_letter_abbrev WHEN 'META' THEN number_of_taxa ELSE 0 END) as metazoa, + SUM(CASE three_letter_abbrev WHEN 'VIRI' THEN number_of_taxa ELSE 0 END) as viridiplantae, + SUM(CASE three_letter_abbrev WHEN 'OEUK' THEN number_of_taxa ELSE 0 END) as other_eukaryotes, + SUM(CASE three_letter_abbrev WHEN 'FIRM' THEN number_of_taxa ELSE 0 END) as bacteria_firm, + SUM(CASE three_letter_abbrev WHEN 'PROA' THEN number_of_taxa ELSE 0 END) as bacteria_proa, + SUM(CASE three_letter_abbrev WHEN 'PROB' THEN number_of_taxa ELSE 0 END) as bacteria_prob, + SUM(CASE three_letter_abbrev WHEN 'PROD' THEN number_of_taxa ELSE 0 END) as bacteria_prod, + SUM(CASE three_letter_abbrev WHEN 'PROE' THEN number_of_taxa ELSE 0 END) as bacteria_proe, + SUM(CASE three_letter_abbrev WHEN 'PROG' THEN number_of_taxa ELSE 0 END) as bacteria_prog, + SUM(CASE three_letter_abbrev WHEN 'OBAC' THEN number_of_taxa ELSE 0 END) as bacteria_obac, + SUM(CASE three_letter_abbrev WHEN 'ARCH' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'EURY' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'CREN' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'NANO' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'KORA' THEN number_of_taxa ELSE 0 END) as archaea_arch, + SUM(CASE three_letter_abbrev WHEN 'EURY' THEN number_of_taxa ELSE 0 END) as archaea_eury, + SUM(CASE three_letter_abbrev WHEN 'CREN' THEN number_of_taxa ELSE 0 END) as archaea_cren, + SUM(CASE three_letter_abbrev WHEN 'NANO' THEN number_of_taxa ELSE 0 END) as archaea_nano, + SUM(CASE three_letter_abbrev WHEN 'KORA' THEN number_of_taxa ELSE 0 END) as archaea_kora, + SUM(CASE three_letter_abbrev WHEN 'ALVE' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'CILI' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'APIC' THEN number_of_taxa ELSE 0 END) as alveolata_alve, + SUM(CASE three_letter_abbrev WHEN 'CILI' THEN number_of_taxa ELSE 0 END) as alveolata_cili, + SUM(CASE three_letter_abbrev WHEN 'APIC' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'COCC' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'ACON' THEN number_of_taxa ELSE 0 END) as alveolata_apic, + SUM(CASE three_letter_abbrev WHEN 'COCC' THEN number_of_taxa ELSE 0 END) as alveolata_cocc, + SUM(CASE three_letter_abbrev WHEN 'HAEM' THEN number_of_taxa ELSE 0 END) as alveolata_haem, + SUM(CASE three_letter_abbrev WHEN 'PIRO' THEN number_of_taxa ELSE 0 END) as alveolata_piro, + SUM(CASE three_letter_abbrev WHEN 'STRE' THEN number_of_taxa ELSE 0 END) as viridiplantae_stre, + SUM(CASE three_letter_abbrev WHEN 'CHLO' THEN number_of_taxa ELSE 0 END) as viridiplantae_chlo, + SUM(CASE three_letter_abbrev WHEN 'RHOD' THEN number_of_taxa ELSE 0 END) as viridiplantae_rhod, + SUM(CASE three_letter_abbrev WHEN 'CRYP' THEN number_of_taxa ELSE 0 END) as viridiplantae_cryp, + SUM(CASE three_letter_abbrev WHEN 'FUNG' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'MICR' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'BASI' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'ASCO' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'MUCO' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'CHYT' THEN number_of_taxa ELSE 0 END) as fungi_fung, + SUM(CASE three_letter_abbrev WHEN 'MICR' THEN number_of_taxa ELSE 0 END) as fungi_micr, + SUM(CASE three_letter_abbrev WHEN 'BASI' THEN number_of_taxa ELSE 0 END) as fungi_basi, + SUM(CASE three_letter_abbrev WHEN 'ASCO' THEN number_of_taxa ELSE 0 END) as fungi_asco, + SUM(CASE three_letter_abbrev WHEN 'MUCO' THEN number_of_taxa ELSE 0 END) as fungi_muco, + SUM(CASE three_letter_abbrev WHEN 'CHYT' THEN number_of_taxa ELSE 0 END) as fungi_chyt, + SUM(CASE three_letter_abbrev WHEN 'OMET' THEN number_of_taxa ELSE 0 END) as metazoa_omet, + SUM(CASE three_letter_abbrev WHEN 'NEMA' THEN number_of_taxa ELSE 0 END) as metazoa_nema, + SUM(CASE three_letter_abbrev WHEN 'ARTH' THEN number_of_taxa ELSE 0 END) as metazoa_arth, + SUM(CASE three_letter_abbrev WHEN 'CHOR' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'ACTI' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'AVES' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'MAMM' THEN number_of_taxa ELSE 0 END) - + SUM(CASE three_letter_abbrev WHEN 'TUNI' THEN number_of_taxa ELSE 0 END) as metazoa_chor, + SUM(CASE three_letter_abbrev WHEN 'ACTI' THEN number_of_taxa ELSE 0 END) as metazoa_acti, + SUM(CASE three_letter_abbrev WHEN 'AVES' THEN number_of_taxa ELSE 0 END) as metazoa_aves, + SUM(CASE three_letter_abbrev WHEN 'MAMM' THEN number_of_taxa ELSE 0 END) as metazoa_mamm, + SUM(CASE three_letter_abbrev WHEN 'TUNI' THEN number_of_taxa ELSE 0 END) as metazoa_tuni + FROM (SELECT og.group_id AS name,agt.three_letter_abbrev,agt.number_of_taxa::numeric + FROM ApiDB.OrthologGroupTaxon agt, apidb.orthologgroup og + WHERE agt.group_id = og.group_id + AND agt.three_letter_abbrev = UPPER(agt.three_letter_abbrev)) + GROUP BY name) actual +) + + ; + diff --git a/Model/lib/psql/webtables/??/ProjectTaxon.psql b/Model/lib/psql/webtables/??/ProjectTaxon.psql new file mode 100644 index 0000000000..dd3d4ee56b --- /dev/null +++ b/Model/lib/psql/webtables/??/ProjectTaxon.psql @@ -0,0 +1,151 @@ + + + CREATE TABLE :ORG_ABBREVProjectTaxon AS + WITH + local_taxon -- a taxon found in this instance, either in dots.ExternalNaSequence or in apidb.Organism + AS ( SELECT distinct tn.name as taxon, + substr((tn.name), 1, position(' ' IN tn.name||' ') - 1) as first_word, + pi.name as project_id + FROM dots.ExternalNaSequence ens, sres.TaxonName tn, core.ProjectInfo pi + WHERE ens.taxon_id = tn.taxon_id + and ((tn.name not like 'Bodo %' and tn.name not like 'Drosophila %') + OR tn.name_class = 'scientific name') + and ens.row_project_id = pi.project_id + -- get names from apidb.Organism.family_name_for_files + -- (may not be necessary) + UNION + SELECT family_name_for_files as taxon, + substr((family_name_for_files), 1, position(' ' IN family_name_for_files||' ') - 1) as first_word, + project_name as project_id + FROM apidb.Organism + WHERE family_name_for_files in (select name from sres.TaxonName) + ), + mononym -- a taxon name that's the first word of a local taxon + AS (SELECT distinct lower(lt.first_word) as taxon, lt.project_id + FROM local_taxon lt, sres.TaxonName tn + WHERE lt.first_word = tn.name + -- and tn.name_class = 'scientific name' + ), + full_name -- the full name of a local taxon whose first name is not a taxon + AS (SELECT distinct lower(lt.taxon) as taxon, lt.project_id + FROM local_taxon lt, sres.TaxonName tn + WHERE lt.taxon = tn.name + -- and tn.name_class = 'scientific name' + and lower(lt.first_word) not in (select taxon from mononym)) + SELECT * FROM mononym + UNION + SELECT * FROM full_name + UNION + SELECT 'hypocrea', 'FungiDB' + UNION + SELECT 'perkinsiella-like_sp._plo/afsm11', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/afsm2', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/amopi', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/asl1', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/de11d', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/de4a', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/frs/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/gd-d1-1', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/gd-d1-2', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/gd-d1-3', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/gillnor1/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/gillnor2/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/gillrich3/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/lithon', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/net12afl/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/netc1/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/netc2/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/neth2t3/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/np251002/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/nrss/ii', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/pal2', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/pao27/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/rp', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/sed5a/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/sedc1/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/sedcb1/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/sedct1/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/sedmh1/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/sedst1/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/sm53', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/sm68', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/st4n', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/st8v/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/su03', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/su4', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/tg1162', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/tg1267', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/tun1/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/wt2708/i', 'TriTrypDB' + UNION + SELECT 'perkinsiella-like_sp._plo/wtuts/i', 'TriTrypDB' + UNION + SELECT 'plo_of_paramoeba_invadens_ags-2013', 'TriTrypDB' + UNION + SELECT 'soil_flagellate_and31', 'TriTrypDB' + UNION + SELECT 'kinetoplastid_flagellate_lfs2', 'TriTrypDB' + UNION + SELECT 'cryptaulaxoides-like_sp._tcs-2003', 'TriTrypDB' + ORDER BY 2, 1 + + ; + + + + update :ORG_ABBREVProjectTaxon + set taxon = replace(taxon, '''', '') + where taxon like '%''%' + + ; + + + + create unique index data_load_prjct_err on :ORG_ABBREVProjectTaxon (taxon) + + ; + + + + create unique index projtax_ix on :ORG_ABBREVProjectTaxon (taxon, project_id) + + ; + diff --git a/Model/lib/psql/webtables/??/SequenceAttributes.psql b/Model/lib/psql/webtables/??/SequenceAttributes.psql new file mode 100644 index 0000000000..4cba9b09fb --- /dev/null +++ b/Model/lib/psql/webtables/??/SequenceAttributes.psql @@ -0,0 +1,131 @@ + + + + create table SequenceAttributes as + SELECT + aas.source_id AS full_id, + aas.source_id, + aas.aa_sequence_id, + length(aas.sequence) as length, + aas.description AS product, + aas.taxon_id, + + taxon.orthomcl_taxon_id, + taxon.taxon_group, + taxon.orthomcl_abbrev AS taxon_abbreviation, + taxon.name AS organism_name, + taxon.core_peripheral, + + o.group_id AS group_name, + o.ortholog_group_id, + o.number_of_members AS group_size, + o.number_of_core_members, + o.number_of_peripheral_members, + CASE is_residual WHEN 1 THEN 'Residual' + ELSE 'Core' END AS group_type, + urls.source_url, + urls.source_text + FROM + dots.AASequence aas, + apidb.orthologGroup o, + apidb.orthologGroupAASequence ogseq, + ( + SELECT o.orthomcl_abbrev, + o.taxon_id as orthomcl_taxon_id, + t.name, + t.core_peripheral, + t.taxon_group + FROM apidb.organism o, + (WITH RECURSIVE TaxonHierarchy AS ( + SELECT + three_letter_abbrev, + orthomcl_clade_id, + name, + core_peripheral, + name AS taxon_group, + parent_id + FROM apidb.OrthomclClade + WHERE name IN ('Archaea', 'Bacteria', 'Alveolates', 'Amoebozoa', 'Euglenozoa', + 'Fungi', 'Metazoa', 'Other Eukaryota', 'Viridiplantae') + UNION ALL + SELECT + child.three_letter_abbrev, + child.orthomcl_clade_id, + child.name, + child.core_peripheral, + parent.taxon_group, + child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT three_letter_abbrev, taxon_group, name, core_peripheral + FROM TaxonHierarchy + WHERE core_peripheral IN ('C', 'P') + ) t + WHERE t.three_letter_abbrev = o.orthomcl_abbrev + ) taxon, + + ( + SELECT aas.aa_sequence_id, + CASE + WHEN ores.resource_name IN ('AmoebaDB','CryptoDB','FungiDB','GiardiaDB','HostDB','MicrosporidiaDB', + 'PlasmoDB','PiroplasmaDB','ToxoDB','TrichDB','TriTrypDB','VectorBase') + THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/downloads')) || 'record/gene/' + || aas.source_id + WHEN ores.resource_name = 'Uniprot' + THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/proteomes') ) || 'uniprot/' + || aas.source_id + ELSE '' END AS source_url, + CASE WHEN ores.resource_name IS NULL THEN '' + ELSE aas.source_id || ' (' || ores.resource_name || ')' END AS source_text + FROM dots.AaSequence aas, + apidb.organism ot, + apidb.orthomclresource ores + WHERE ot.taxon_id = ores.orthomcl_taxon_id + AND ot.taxon_id = aas.taxon_id) urls + WHERE aas.aa_sequence_id = ogseq.aa_sequence_id + AND ogseq.group_id = o.group_id + AND aas.aa_sequence_id = urls.aa_sequence_id + AND taxon.orthomcl_taxon_id = aas.taxon_id + AND aas.taxon_id in (select distinct(eas.taxon_id) from apidb.organism og, dots.aasequence eas where eas.taxon_id = og.taxon_id) + + ; + + + + alter table SequenceAttributes + add constraint SeqAttrs_pk primary key (full_id) + + ; + + + + create unique index SeqAttrs_idx ON SequenceAttributes (full_id, group_name, taxon_id, source_id) + + ; + + + + create unique index SeqAttrs_gusIdx ON SequenceAttributes (ortholog_group_id, aa_sequence_id) + + ; + + + + create unique index SeqAttrs_idx2 ON SequenceAttributes (group_name, length desc, full_id, taxon_id) + + ; + + + + create unique index SeqAttrs_idx3 + on SequenceAttributes (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id) + + ; + + + + create unique index SeqAttrs_idx4 ON SequenceAttributes (source_id, full_id, group_name, taxon_id) + + ; + diff --git a/Model/lib/psql/webtables/??/SequenceEnzymeClass.psql b/Model/lib/psql/webtables/??/SequenceEnzymeClass.psql new file mode 100644 index 0000000000..35fdff6793 --- /dev/null +++ b/Model/lib/psql/webtables/??/SequenceEnzymeClass.psql @@ -0,0 +1,36 @@ + + + CREATE TABLE SequenceEnzymeClass AS ( + SELECT sa.full_id + , sa.group_name + -- , sec.uniprot_accession + , ec.ec_number + , ec.description + , ec.parent_id as ec_parent + , ec.ec_number_1 + , ec.ec_number_2 + , ec.ec_number_3 + , ec.ec_number_4 + FROM sequenceattributes sa + , dots.AASequence aa + , dots.AASequenceEnzymeClass sec + , sres.ENZYMECLASS ec + WHERE sa.aa_sequence_id = aa.aa_sequence_id + AND sec.aa_sequence_id = aa.aa_sequence_id + AND sec.enzyme_class_id = ec.enzyme_class_id + ) + + ; + + + + CREATE INDEX SequenceEnzymeClass_idx1 ON SequenceEnzymeClass (group_name, ec_number, description) + + ; + + + + CREATE INDEX SequenceEnzymeClass_idx2 ON SequenceEnzymeClass (full_id, ec_number, description) + + ; + diff --git a/Model/lib/psql/webtables/??/SequencePieceClosure.psql b/Model/lib/psql/webtables/??/SequencePieceClosure.psql new file mode 100644 index 0000000000..3993db26d3 --- /dev/null +++ b/Model/lib/psql/webtables/??/SequencePieceClosure.psql @@ -0,0 +1,48 @@ + + + CREATE TABLE :ORG_ABBREVSequencePieceClosure AS + SELECT sp.sequence_piece_id, + sp.virtual_na_sequence_id, + sp.piece_na_sequence_id, + sp.sequence_order, + sp.distance_from_left, + sp.uncertainty, + sp.strand_orientation, + sp.start_position, + sp.end_position, + sp.modification_date, + 1 AS edge_level + FROM dots.SequencePiece sp, dots.NaSequence ns + WHERE sp.piece_na_sequence_id = ns.na_sequence_id + AND (ns.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) + + ; + + + + /* known issue: this should be run not just once, but iteratively + until it doesn't create new records. Currently (7/2008), + SequencePieces aren't nested even once. */ + INSERT INTO :ORG_ABBREVSequencePieceClosure + (edge_level, virtual_na_sequence_id, piece_na_sequence_id, + distance_from_left, strand_orientation, modification_date, + start_position, end_position, sequence_order, sequence_piece_id) + SELECT 2, higher.virtual_na_sequence_id, lower.piece_na_sequence_id, + higher.distance_from_left, + case + when coalesce(higher.strand_orientation, '+') = coalesce(lower.strand_orientation, '+') + then '+' + else '-' + end as strand_orientation, + now(), + higher.start_position - lower.distance_from_left, + higher.end_position - lower.distance_from_left, + higher.sequence_order, + nextval('dots.sequencepiece_sq') + FROM :ORG_ABBREVSequencePieceClosure higher, :ORG_ABBREVSequencePieceClosure lower + WHERE higher.piece_na_sequence_id = lower.virtual_na_sequence_id + AND higher.start_position >= lower.start_position + lower.distance_from_left + AND higher.end_position <= lower.end_position + lower.distance_from_left + + ; + diff --git a/Model/lib/psql/webtables/??/StudyIdDatasetId.psql b/Model/lib/psql/webtables/??/StudyIdDatasetId.psql new file mode 100644 index 0000000000..c42d4df261 --- /dev/null +++ b/Model/lib/psql/webtables/??/StudyIdDatasetId.psql @@ -0,0 +1,24 @@ + + + CREATE TABLE StudyIdDatasetId AS + SELECT s.STABLE_ID STUDY_STABLE_ID, dp.DATASET_PRESENTER_ID DATASET_ID, dp.SHORT_DISPLAY_NAME AS DATASET_SHORT_DISPLAY_NAME + FROM EDA.STUDY s + LEFT JOIN sres.EXTERNALDATABASERELEASE e ON s.EXTERNAL_DATABASE_RELEASE_ID =e.EXTERNAL_DATABASE_RELEASE_ID + LEFT JOIN sres.EXTERNALDATABASE e2 ON e.EXTERNAL_DATABASE_ID =e2.EXTERNAL_DATABASE_ID + LEFT JOIN DatasetPresenter dp on e2.name=dp.name + -- This is TEMPORARY (used for alpha MapVEU Application) + UNION + select case + when d.dataset_presenter_id = 'DS_480c976ef9' then 'VBP_MEGA' + when d.dataset_presenter_id = 'DS_e18287e335' then '2023-maine-ricinus' + when d.dataset_presenter_id = 'DS_2b98dd44ab' then '2010-Neafsey-M-S-Bamako' + else 'NA' end as study_stable_id, + d.dataset_presenter_id as dataset_id, d.short_display_name as dataset_short_display_name + from DatasetPresenter d + where d.dataset_presenter_id in ( + 'DS_480c976ef9', + 'DS_e18287e335' + ) + + ; + diff --git a/Model/lib/psql/webtables/??/TypeAheadCounts.psql b/Model/lib/psql/webtables/??/TypeAheadCounts.psql new file mode 100644 index 0000000000..921efa9bd7 --- /dev/null +++ b/Model/lib/psql/webtables/??/TypeAheadCounts.psql @@ -0,0 +1,42 @@ + + +CREATE TABLE TypeAheadCounts AS + (SELECT DISTINCT dr.primary_identifier as option_id, + count(distinct aaf.aa_sequence_id) AS protein_count + FROM dots.DbRefAaFeature draf, sres.DbRef dr, dots.aafeature aaf, + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + WHERE draf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id + = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND aaf.aa_feature_id = draf.aa_feature_id + GROUP BY dr.primary_identifier + ) + UNION + (SELECT DISTINCT ec.ec_number AS option_id, + COUNT(DISTINCT asec.aa_sequence_id) AS protein_count + FROM sres.enzymeClass ec, + dots.aaSequenceEnzymeClass asec + WHERE asec.enzyme_class_id = ec.enzyme_class_id + GROUP BY ec.ec_number) + UNION + (SELECT DISTINCT gt.source_id AS option_id, + COUNT(DISTINCT aaf.aa_sequence_id) AS protein_count + FROM dots.AaFeature aaf, + dots.GoAssociation ga, sres.OntologyTerm gt, + dots.GoAssociationInstance gai,core.TableInfo ti + WHERE aaf.aa_sequence_id = ga.row_id + AND ga.table_id = ti.table_id + AND ti.name = 'ExternalAASequence' + AND ga.go_term_id = gt.ontology_term_id + AND ga.go_association_id = gai.go_association_id + GROUP BY gt.source_id) + + ; + + + +CREATE INDEX TypeAheadCounts_idx ON TypeAheadCounts (option_id) + + ; + diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes.psql b/Model/lib/psql/webtables/MG/CompoundAttributes.psql new file mode 100644 index 0000000000..3f69995d58 --- /dev/null +++ b/Model/lib/psql/webtables/MG/CompoundAttributes.psql @@ -0,0 +1,28 @@ + + + CREATE TABLE :ORG_ABBREVCompoundAttributes AS + SELECT p.ID + , p.source_id + , p.compound_name + , string_agg(childc.other_names, ';' ORDER BY childc.other_names) AS other_names + , string_agg(childc.iupac_name, ';' ORDER BY childc.iupac_name) AS iupac_name + , string_agg(childc.syn, ';' ORDER BY childc.syn) AS syn + , p.definition + , p.secondary_ids + , string_agg(childc.formula, ';' ORDER BY childc.formula) AS formula + , avg(childc.mass::numeric) AS mass + FROM CompoundProperties p + , (SELECT id, parent_id, other_names, iupac_name, syn, mass, formula FROM CompoundProperties ) childc + WHERE p.parent_id IS NULL + AND ( p.ID = childc.parent_id OR p.ID = childc.ID ) + GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids + + ; + + + + CREATE INDEX :ORG_ABBREVCompoundAttributes_idx ON :ORG_ABBREVCompoundAttributes (source_id) + + + ; + diff --git a/Model/lib/psql/webtables/MG/CompoundId.psql b/Model/lib/psql/webtables/MG/CompoundId.psql new file mode 100644 index 0000000000..64eafbd86a --- /dev/null +++ b/Model/lib/psql/webtables/MG/CompoundId.psql @@ -0,0 +1,39 @@ + + + CREATE TABLE :ORG_ABBREVCompoundId AS + SELECT source_id AS id, source_id AS compound, 'same ID' AS type, '' as source + FROM :ORG_ABBREVCompoundAttributes + UNION + SELECT p.source_id AS id, ca.source_id AS compound, 'child ID' AS type, '' as source + FROM :ORG_ABBREVCompoundAttributes ca, CompoundProperties p + WHERE ca.id = p.parent_id + UNION + SELECT da.accession_number AS id, p.source_id AS compound, 'KEGG' AS type, '' as source + FROM chebi.database_accession da, :ORG_ABBREVCompoundAttributes p + WHERE da.type='KEGG COMPOUND accession' + AND da.compound_id = p.id + UNION + SELECT distinct da.accession_number AS id, p.chebi_accession AS compound, 'KEGG' as type, '' as source + FROM chebi.database_accession da, chebi.compounds c, chebi.compounds p + WHERE NOT p.status in ('D', 'F') AND da.type='KEGG COMPOUND accession' + AND da.compound_id = c.id AND c.parent_id=p.id + UNION + SELECT n.name as id, ca.source_id as compound, 'name' as type, n.source + FROM :ORG_ABBREVCompoundAttributes ca, chebi.names n + WHERE ca.id = n.compound_id + AND n.type = 'NAME' + UNION + SELECT n.name as id, ca.source_id as compound, 'synonym' as type, n.source + FROM :ORG_ABBREVCompoundAttributes ca, chebi.names n + WHERE ca.id = n.compound_id + AND n.type = 'SYNONYM' + + ; + + + + CREATE INDEX :ORG_ABBREVCompoundId_idx ON :ORG_ABBREVCompoundId (id, compound) + + + ; + diff --git a/Model/lib/psql/webtables/MG/CompoundProperties.psql b/Model/lib/psql/webtables/MG/CompoundProperties.psql new file mode 100644 index 0000000000..7bea00b811 --- /dev/null +++ b/Model/lib/psql/webtables/MG/CompoundProperties.psql @@ -0,0 +1,29 @@ + + + CREATE TABLE CompoundProperties AS + SELECT c.ID, c.chebi_accession AS source_id, c.parent_id, + c.name AS compound_name, + substr(string_agg(cn.name, ';'), 1, 1000) AS other_names, + substr(string_agg(ciup.iupac_name, ';'), 1, 1000) AS iupac_name, + substr(string_agg(csyn.syn, ';'), 1, 1000) AS syn, + c.definition, m.mass, + string_agg(formu.formula, ';' order by formu.formula) AS formula, + string_agg(sec.chebi_accession, ';' order by sec.chebi_accession) AS secondary_ids + FROM chebi.compounds c + LEFT JOIN ( SELECT compound_id, NAME FROM chebi.names WHERE TYPE='NAME') + cn ON c.ID = cn.compound_id + LEFT JOIN ( SELECT compound_id, MIN(NAME) AS iupac_name FROM chebi.names WHERE TYPE='IUPAC NAME' GROUP BY compound_id) + ciup ON c.ID = ciup.compound_id + LEFT JOIN ( SELECT compound_id, MIN(NAME) AS syn FROM chebi.names WHERE type='SYNONYM' GROUP BY compound_id) + csyn ON c.ID = csyn.compound_id + LEFT JOIN ( SELECT compound_id, chemical_data AS formula FROM chebi.chemical_data WHERE TYPE='FORMULA') + formu ON c.ID = formu.compound_id + LEFT JOIN ( SELECT compound_id, chemical_data AS mass FROM chebi.chemical_data WHERE TYPE='MASS' and chemical_data != 'NaN') + m ON c.ID = m.compound_id + LEFT JOIN ( SELECT parent_id, chebi_accession FROM chebi.compounds) + sec ON c.ID = sec.parent_id + WHERE NOT c.status in ('D', 'F') + GROUP BY c.ID, c.chebi_accession, c.parent_id, c.name, c.definition, m.mass + + ; + diff --git a/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql b/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql new file mode 100644 index 0000000000..244b989c5c --- /dev/null +++ b/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql @@ -0,0 +1,15 @@ + + + CREATE TABLE CompoundTypeAheads AS + SELECT ca.source_id AS compound_id, + ca.source_id || ' (' || ca.compound_name || ')' AS display + FROM CompoundAttributes ca, PathwayCompounds pc + WHERE pc.chebi_accession = ca.source_id + UNION + SELECT ca.source_id AS compound_id, + pc.compound_source_id || ' (' || ca.compound_name || ')' AS display + FROM CompoundAttributes ca, PathwayCompounds pc + WHERE pc.chebi_accession = ca.source_id + + ; + diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql new file mode 100644 index 0000000000..766215dcc9 --- /dev/null +++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql @@ -0,0 +1,32 @@ + + +CREATE TABLE GroupDomainAttribute AS +( +SELECT og.group_id AS group_name, ag.descriptions +FROM apidb.OrthologGroup og, + (SELECT group_name, + STRING_AGG(accession ||' (' || num_proteins|| ')', ', ') AS descriptions + FROM (SELECT group_name, accession, num_proteins, rnk + FROM (SELECT group_name, accession, num_proteins, + rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk + FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins + FROM DomainAssignment + GROUP BY group_name,accession + ) + ) + WHERE rnk <= 3 + ) + GROUP BY group_name + ORDER BY 1 + ) ag +WHERE og.group_id = ag.group_name +) + + ; + + + +CREATE INDEX GroupDomainAttribute_idx ON GroupDomainAttribute (group_name) + + ; + diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql new file mode 100644 index 0000000000..a117a4a74d --- /dev/null +++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql @@ -0,0 +1,48 @@ + + + CREATE UNLOGGED TABLE :ORG_ABBREVIs_a_links AS + SELECT subject_term_id, object_term_id + FROM sres.OntologyRelationship rel, sres.OntologyTerm pred + WHERE rel.predicate_term_id = pred.ontology_term_id + AND pred.name = 'is_a' + + ; + + + + CREATE UNLOGGED TABLE :ORG_ABBREVRoots AS + SELECT object_term_id FROM :ORG_ABBREVis_a_links + EXCEPT + SELECT subject_term_id FROM :ORG_ABBREVis_a_links + + ; + + + + CREATE TABLE :ORG_ABBREVOntologyLevels as + WITH RECURSIVE levels(ontology_term_id, depth) AS ( + SELECT object_term_id, 1 as depth FROM :ORG_ABBREVRoots + UNION + SELECT :ORG_ABBREVis_a_links.subject_term_id, levels.depth + 1 as depth + FROM :ORG_ABBREVIs_a_links, levels + WHERE :ORG_ABBREVis_a_links.object_term_id = levels.ontology_term_id + ) + SELECT ontology_term_id, min(depth) as min_depth, max(depth) as max_depth + FROM ( + SELECT ontology_term_id, depth + FROM levels + WHERE ontology_term_id NOT IN (SELECT object_term_id FROM :ORG_ABBREVRoots) + UNION + SELECT object_term_id, 0 FROM :ORG_ABBREVRoots + ) t + GROUP BY ontology_term_id + + ; + + + + create index olev_termix on :ORG_ABBREVOntologyLevels (ontology_term_id, min_depth, max_depth) + + + ; + diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webtables/MG/PathwayAttributes.psql new file mode 100644 index 0000000000..445fd4bffb --- /dev/null +++ b/Model/lib/psql/webtables/MG/PathwayAttributes.psql @@ -0,0 +1,68 @@ + + + CREATE TABLE PathwayAttributes as + SELECT + p.source_id + , p.pathway_id + , p.name + , enz.total_enzyme_count + , cpd.total_compound_count + , p.url + , replace(replace(ed.name, 'Pathways_', ''), '_RSRC', '') as pathway_source + , ed.name as external_db_name + , edr.version as external_db_version + FROM + sres.pathway p + , sres.externalDatabase ed + , sres.externalDatabaseRelease edr + ,(SELECT + COUNT( *) AS total_compound_count + , pathway_id + FROM + sres.pathwayNode pn + , SRES.ontologyterm ot + WHERE + pn.pathway_node_type_id = ot.ontology_term_id + AND ot.name = 'molecular entity' + GROUP BY + pathway_id + ) cpd + ,(SELECT + COUNT( *) AS total_enzyme_count + , pathway_id + FROM + sres.pathwayNode pn + , SRES.ontologyterm ot + WHERE + pn.pathway_node_type_id = ot.ontology_term_id + AND ot.name = 'enzyme' + GROUP BY + pathway_id + ) enz + WHERE + ed.external_database_id = edr.external_database_id + AND edr.external_database_release_id = p.external_database_release_id + AND cpd.pathway_id = p.pathway_id + AND enz.pathway_id = p.pathway_id + AND source_id NOT IN('ec01100', 'ec01110', 'ec01120') + -- temporarily remove MPMP from release 46 + AND ed.name NOT LIKE '%MPMP%' + + ; + + + + CREATE UNIQUE INDEX PathAttr_sourceId_pwaySrc + ON :ORG_ABBREVPathwayAttributes (source_id, pathway_source) + + + ; + + + + create index PathAttr_ix + on PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count) + + + ; + diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds.psql b/Model/lib/psql/webtables/MG/PathwayCompounds.psql new file mode 100644 index 0000000000..1ca74c7a6c --- /dev/null +++ b/Model/lib/psql/webtables/MG/PathwayCompounds.psql @@ -0,0 +1,91 @@ + + + CREATE TABLE PathwayCompounds AS + SELECT + pathway_id + , reaction_id + , ext_db_name + , ext_db_version + , compound_node_id + , compound_source_id + , c.chebi_accession + , case when c.chebi_accession is not null then 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=' || c.chebi_accession else null end as chebi_url + , type + FROM ( + SELECT + p.PATHWAY_ID + , prx.PATHWAY_REACTION_ID as reaction_id + , ed.NAME as ext_db_name + , edr.version as ext_db_version + , pn.pathway_node_id as compound_node_id + , pn.DISPLAY_LABEL as compound_source_id + , 'substrate' as type + , pn.row_id + FROM + APIDB.PATHWAYREACTION prx + , SRES.PATHWAY p + , APIDB.PATHWAYREACTIONREL prr + , SRES.PATHWAYNODE pn + , SRES.PATHWAYRELATIONSHIP prel + , SRES.ONTOLOGYTERM ot + , SRES.EXTERNALDATABASE ed + , SRES.EXTERNALDATABASERELEASE edr + WHERE p.PATHWAY_ID = prr.PATHWAY_ID + AND prx.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID + AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID + AND prel.NODE_ID = pn.PATHWAY_NODE_ID + AND ot.NAME = 'molecular entity' + AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID + AND p.EXTERNAL_DATABASE_RELEASE_ID = edr.EXTERNAL_DATABASE_RELEASE_ID + AND edr.EXTERNAL_DATABASE_ID = ed.EXTERNAL_DATABASE_ID + ) t LEFT OUTER JOIN CHEBI.COMPOUNDS c on t.row_id = c.ID + UNION + SELECT + pathway_id + , reaction_id + , ext_db_name + , ext_db_version + , compound_node_id + , compound_source_id + , c.chebi_accession + , case when c.chebi_accession is not null then 'https://www.ebi.ac.uk/chebi/searchId.do?chebiId=' || c.chebi_accession else null end as chebi_url + , type + FROM ( + SELECT + p.PATHWAY_ID + , prx.PATHWAY_REACTION_ID as reaction_id + , ed.NAME as ext_db_name + , edr.version as ext_db_version + , pn.pathway_node_id as compound_node_id + , pn.DISPLAY_LABEL as compound_source_id + , 'product' as type + , pn.row_id + FROM + APIDB.PATHWAYREACTION prx + , SRES.PATHWAY p + , APIDB.PATHWAYREACTIONREL prr + , SRES.PATHWAYNODE pn + , SRES.PATHWAYRELATIONSHIP prel + , SRES.ONTOLOGYTERM ot + , SRES.EXTERNALDATABASE ed + , SRES.EXTERNALDATABASERELEASE edr + WHERE p.PATHWAY_ID = prr.PATHWAY_ID + AND prx.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID + AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID + AND prel.ASSOCIATED_NODE_ID = pn.PATHWAY_NODE_ID + AND ot.NAME = 'molecular entity' + AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID + AND p.EXTERNAL_DATABASE_RELEASE_ID = edr.EXTERNAL_DATABASE_RELEASE_ID + AND edr.EXTERNAL_DATABASE_ID = ed.EXTERNAL_DATABASE_ID + ) t2 LEFT OUTER JOIN CHEBI.COMPOUNDS c on t2.row_id = c.ID + + ; + + + + create index PthCmpd_id_ix + on PathwayCompounds (pathway_id, reaction_id, ext_db_name) + + + ; + diff --git a/Model/lib/psql/webtables/MG/PathwayNodes.psql b/Model/lib/psql/webtables/MG/PathwayNodes.psql new file mode 100644 index 0000000000..82ca920c30 --- /dev/null +++ b/Model/lib/psql/webtables/MG/PathwayNodes.psql @@ -0,0 +1,306 @@ + + + CREATE UNLOGGED TABLE NodesWithTypes AS + SELECT pn.pathway_id + , CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS display_label + , pa.url + , CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS name + , pa.source_id AS node_identifier + , pn.pathway_node_id AS pathway_node_id + , pn.x + , pn.y + , pn.width + , pn.height + , pn.cellular_location + , ot.name AS type + , NULL AS gene_count + , NULL AS default_structure + FROM sres.pathwaynode pn + INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id + LEFT JOIN PathwayAttributes pa ON pn.display_label = pa.source_id + WHERE ot.name = 'metabolic process' + UNION ALL + SELECT pn.pathway_id + , pn.display_label + , NULL AS url + , coalesce(ec.description, pn.display_label) AS name + , ec.ec_number AS node_identifier + , pn.pathway_node_id AS pathway_node_id + , pn.x + , pn.y + , pn.width + , pn.height + , pn.cellular_location + , ot.name AS type + , count (tp.gene_source_id) as gene_count + , NULL AS default_structure + FROM sres.pathwaynode pn + INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id + LEFT JOIN sres.enzymeclass ec ON pn.row_id = ec.enzyme_class_id + LEFT JOIN PathwayNodeGene tp ON pn.pathway_node_id = tp.pathway_node_id + WHERE ot.name = 'enzyme' + GROUP BY pn.pathway_id + , pn.display_label + , ec.description + , ec.ec_number + , pn.pathway_node_id + , pn.x + , pn.y + , pn.width + , pn.height + , pn.cellular_location + , ot.name + UNION ALL + SELECT pn.pathway_id + , pn.display_label + , NULL AS url + , coalesce(c.name, pn.display_label) AS name + , c.chebi_accession AS node_identifier + , pn.pathway_node_id AS pathway_node_id + , pn.x + , pn.y + , pn.width + , pn.height + , pn.cellular_location + , ot.name AS type + , NULL AS gene_count + , st.default_structure + FROM sres.pathwaynode pn + INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id + LEFT JOIN chebi.compounds c ON pn.row_id = c.id + LEFT JOIN ( + SELECT n.compound + , s.structure AS default_structure + FROM chebi.structures s + , (SELECT id + , compound + FROM CompoundId + WHERE type IN ('same ID', 'child ID') + ) n + WHERE n.id = 'CHEBI:' || s.compound_id + AND s.type = 'mol' + AND s.dimension = '2D' + AND s.default_structure = 'Y' + ) st ON c.chebi_accession = st.compound + WHERE ot.name = 'molecular entity' + + ; + + + + CREATE UNLOGGED TABLE ReactionsWithReversibility AS + SELECT DISTINCT spr.pathway_relationship_id + , tpr.is_reversible + , tpr.reaction_source_id + FROM sres.pathwayrelationship spr + , apidb.pathwayreactionrel prr + , PathwayReactions tpr + WHERE prr.pathway_relationship_id = spr.pathway_relationship_id + AND tpr.reaction_id = prr.pathway_reaction_id + + ; + + + + CREATE UNLOGGED TABLE EnzymeEdges AS + SELECT DISTINCT nwt.pathway_id AS pathway_id + , nwt.pathway_node_id AS e_id + , nwt.type + , i.associated_node_id AS m1_id + , rri.is_reversible AS ir1 + , o.node_id AS m2_id + , rro.is_reversible AS ir2 + , i.associated_node_id || '_' || o.node_id || '_' || rri.is_reversible || '_' || rro.is_reversible as io + FROM sres.pathwayrelationship i + , sres.pathwayrelationship o + , NodesWithTypes nwt + , ReactionsWithReversibility rri + , ReactionsWithReversibility rro + WHERE i.node_id = nwt.pathway_node_id + AND o.associated_node_id = nwt.pathway_node_id + AND i.pathway_relationship_id = rri.pathway_relationship_id + AND o.pathway_relationship_id = rro.pathway_relationship_id + AND nwt.type = 'enzyme' + AND rri.is_reversible = rro.is_reversible + AND rri.reaction_source_id = rro.reaction_source_id + + ; + + + + CREATE UNLOGGED TABLE ParentNodes AS + WITH AllEnzymeEdges AS ( + SELECT string_agg(io, ',' ORDER BY io) AS all_edges + , e_id + , pathway_id + FROM EnzymeEdges + GROUP BY pathway_id + , e_id + ) + , pn as ( + SELECT pathway_id + , all_edges + , string_agg(e_id::varchar, '_' ORDER BY e_id) AS parent + FROM AllEnzymeEdges + GROUP BY pathway_id + , all_edges + HAVING COUNT (*) > 1 + ) + SELECT aee.e_id, pn.* + FROM pn + , AllEnzymeEdges aee + WHERE aee.all_edges = pn.all_edges + + ; + + + + CREATE UNLOGGED TABLE NodesWithParents AS + SELECT DISTINCT ee.e_id AS pathway_node_id + , pn.parent + , ee.type AS node_type + , ee.pathway_id + FROM EnzymeEdges ee + , ParentNodes pn + WHERE pn.pathway_id = ee.pathway_id + AND ee.e_id = pn.e_id + + ; + + + + CREATE UNLOGGED TABLE EnzymeReactions AS + SELECT DISTINCT pn.PATHWAY_NODE_ID node_id + , pr.SOURCE_ID AS reaction_source_id + FROM sres.pathwaynode pn + , apidb.pathwayreaction pr + , APIDB.PATHWAYREACTIONREL prr + , SRES.PATHWAYRELATIONSHIP prel + , sres.ontologyterm ot + WHERE (prel.NODE_ID = pn.PATHWAY_NODE_ID OR prel.ASSOCIATED_NODE_ID = pn.PATHWAY_NODE_ID) + AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID + AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID + AND ot.name = 'enzyme' + AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID + + ; + + + + CREATE UNLOGGED TABLE ParentsForEdges AS + SELECT ee.e_id + , ee.m1_id + , ee.ir1 + , ee.m2_id + , ee.ir2 + , np.parent + FROM EnzymeEdges ee + , NodesWithParents np + WHERE ee.e_id = np.pathway_node_id + + ; + + + + CREATE TABLE PathwayEdges AS + SELECT pa.source_id + , pa.pathway_source + , rel.* + FROM ( + SELECT DISTINCT ee.pathway_id + , coalesce(pe.parent, ee.e_id::varchar) AS source + , ee.m1_id::varchar AS target + , max(ee.ir1) AS is_reversible + FROM EnzymeEdges ee + LEFT JOIN ParentsForEdges pe ON ee.e_id = pe.e_id + GROUP BY ee.pathway_id, ee.m1_id, coalesce(pe.parent, ee.e_id::varchar) + UNION + SELECT DISTINCT ee.pathway_id + , ee.m2_id::varchar AS source + , coalesce(pe.parent, ee.e_id::varchar) AS target + , max(ee.ir2) AS is_reversible + FROM EnzymeEdges ee + LEFT JOIN ParentsForEdges pe ON ee.e_id = pe.e_id + GROUP BY ee.pathway_id, ee.m2_id, coalesce(pe.parent, ee.e_id::varchar) + UNION + SELECT pn1.pathway_id + , pr.node_id::varchar AS source + , pr.associated_node_id::varchar AS target + , pr.is_reversible + FROM sres.pathwayrelationship pr + , sres.pathwaynode pn1 + , sres.pathwaynode pn2 + , sres.ontologyterm ot1 + , sres.ontologyterm ot2 + WHERE pr.node_id = pn1.pathway_node_id + AND pr.associated_node_id = pn2.pathway_node_id + AND pn1.pathway_node_type_id = ot1.ontology_term_id + AND pn2.pathway_node_type_id = ot2.ontology_term_id + AND ot1.name != 'enzyme' + AND ot2.name != 'enzyme' + ) rel + , PathwayAttributes pa + WHERE pa.pathway_id = rel.pathway_id + + ; + + + + CREATE TABLE PathwayNodes AS + SELECT pa.source_id + , pa.pathway_source + , pn.display_label + , pn.x + , pn.y + , pn.width + , pn.height + , pn.cellular_location + , coalesce(pn.url, + CASE WHEN coalesce(type, nodes_with_parents.node_type) = 'enzyme' THEN + CASE + -- CHECK AND FIX + --WHEN REGEXP_LIKE (display_label, '^\d+\.(\d|-)+\.(\d|-)+\.(\d|-)+$') THEN 'https://enzyme.expasy.org/EC/' || display_label + WHEN pa.pathway_source = 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || reaction_source_id + WHEN pa.pathway_source = 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || reaction_source_id + WHEN pa.pathway_source = 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || reaction_source_id + WHEN pa.pathway_source = 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || reaction_source_id + ELSE NULL END + ELSE NULL END) AS url + , pn.name + , pn.node_identifier + , nodes_with_parents.pathway_node_id AS id + , nodes_with_parents.parent + , reaction_source_id + , coalesce(type, nodes_with_parents.node_type) AS node_type + , pn.gene_count + , pn.default_structure + FROM + ( SELECT e_id::varchar AS pathway_node_id + , nwp.parent + , type AS node_type + , ee.pathway_id + , er.reaction_source_id + FROM EnzymeEdges ee + INNER JOIN EnzymeReactions er ON er.node_id = ee.e_id + LEFT JOIN NodesWithParents nwp ON ee.e_id = nwp.pathway_node_id + UNION + SELECT nwp.parent + , NULL + , 'nodeOfNodes' + , pathway_id + , NULL + FROM NodesWithParents nwp + UNION + SELECT nwt.pathway_node_id::varchar AS pathway_node_id + , NULL AS parent + , nwt.type + , nwt.pathway_id + , NULL + FROM NodesWithTypes nwt + WHERE nwt.type != 'enzyme' + ) nodes_with_parents + INNER JOIN PathwayAttributes pa ON nodes_with_parents.pathway_id = pa.pathway_id + LEFT JOIN NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar + + ; + diff --git a/Model/lib/psql/webtables/MG/PathwayReactions.psql b/Model/lib/psql/webtables/MG/PathwayReactions.psql new file mode 100644 index 0000000000..29f1ecef7c --- /dev/null +++ b/Model/lib/psql/webtables/MG/PathwayReactions.psql @@ -0,0 +1,141 @@ + + + CREATE TABLE PathwayReactions AS + SELECT o.* + , CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html + FROM ( + SELECT i.* + , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-' + THEN + 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1=' + || ec.ec_number_1 + || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END + || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END + || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END + ELSE reaction_url END as expasy_url + , ec.description as enzyme_description + FROM ( + SELECT + reaction_id + , reaction_source_id + , reaction_url + , ext_db_name + , ext_db_version + , enzyme + , substrates_html || ' ' || sign || ' ' || products_html as equation_html + , substrates_text || ' ' || sign || ' ' || products_text as equation_text + , case when sign = '<=>' then 1 else 0 end as is_reversible + , substrates_text + , products_text + FROM ( + SELECT + reaction_id + , reaction_source_id + , reaction_url + , ext_db_name + , ext_db_version + , enzyme + , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign + , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html + , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text + , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html + , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text + FROM ( + WITH rep AS ( + SELECT DISTINCT + pr.PATHWAY_REACTION_ID as reaction_id + , pr.SOURCE_ID as reaction_source_id + , pn.DISPLAY_LABEL as enzyme + , coalesce(ca.compound_name, pc.compound_source_id) as compound + , prel.is_reversible as is_reversible_og + , last_value(prel.is_reversible) OVER (partition by pr.pathway_reaction_id ORDER BY prel.is_reversible ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) as is_reversible + , first_value(pc.type) over (partition by pr.pathway_reaction_id, pr.SOURCE_ID, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE, coalesce(pc.chebi_accession, pc.compound_source_id), coalesce(ca.compound_name, pc.compound_source_id) ORDER BY pc.pathway_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as type + FROM + sres.pathway p + , apidb.pathwayreaction pr + , APIDB.PATHWAYREACTIONREL prr + , SRES.PATHWAYNODE pn + , SRES.PATHWAYRELATIONSHIP prel + , SRES.ONTOLOGYTERM ot + , PathwayCompounds pc + LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id + WHERE p.PATHWAY_ID = prr.PATHWAY_ID + AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID + AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID + AND prel.NODE_ID = pn.PATHWAY_NODE_ID + AND ot.name = 'enzyme' + AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID + AND pc.PATHWAY_ID = p.PATHWAY_ID + AND pc.REACTION_id = pr.PATHWAY_REACTION_ID + ) + SELECT DISTINCT + pr.PATHWAY_REACTION_ID as reaction_id + , pr.SOURCE_ID as reaction_source_id + , ed.NAME as ext_db_name + , edr.VERSION as ext_db_version + , cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme + , min(rep.is_reversible) as is_reversible + , min(rep.type) as type + , string_agg (pc.type, ',' order by p.pathway_id) as type_list + , coalesce(ca.compound_name, pc.compound_source_id) as compound + , CASE + WHEN coalesce(pc.CHEBI_ACCESSION, pc.compound_source_id) LIKE 'CHEBI%' + THEN '' || coalesce(ca.compound_name, pc.compound_source_id) || '' + ELSE coalesce(pc.chebi_accession, pc.compound_source_id) + END as compound_url + , CASE (replace (replace (ed.name, 'Pathways_', ''), '_RSRC', '')) + WHEN 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || pr.source_id + WHEN 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id + WHEN 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id + WHEN 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || pr.source_id + WHEN 'FungiCyc' THEN NULL + END as reaction_url + FROM + sres.pathway p + , apidb.pathwayreaction pr + , APIDB.PATHWAYREACTIONREL prr + , SRES.PATHWAYNODE pn + , SRES.PATHWAYRELATIONSHIP prel + , SRES.EXTERNALDATABASE ed + , SRES.EXTERNALDATABASERELEASE edr + , SRES.ONTOLOGYTERM ot + , rep + , PathwayCompounds pc + LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id + WHERE p.PATHWAY_ID = prr.PATHWAY_ID + AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID + AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID + AND prel.NODE_ID = pn.PATHWAY_NODE_ID + AND ot.name = 'enzyme' + AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID + AND pc.EXT_DB_NAME = ed.NAME + AND pc.EXT_DB_VERSION = edr.VERSION + AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID + AND pc.PATHWAY_ID = p.PATHWAY_ID + AND pc.REACTION_id = pr.PATHWAY_REACTION_ID + AND rep.reaction_id = pr.pathway_reaction_id + AND rep.reaction_source_id = pr.source_id + AND rep.compound = coalesce(ca.compound_name, pc.compound_source_id) + AND rep.enzyme = pn.DISPLAY_LABEL + AND rep.is_reversible_og = prel.is_reversible + GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE + , coalesce(pc.chebi_accession, pc.compound_source_id) + , coalesce(ca.compound_name, pc.compound_source_id) + ) t1 + GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible + ) t2 + ) i + LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number + ) o + + ; + + + + create index PathRcts_id_ix + on PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name) + + + ; + diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql new file mode 100644 index 0000000000..fe64a5fecf --- /dev/null +++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql @@ -0,0 +1,44 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE ChIPchipTranscript AS + SELECT DISTINCT ta.source_id, ta.gene_source_id, ta.project_id, sr.protocol_app_node_id, + CASE + WHEN ta.is_reversed = 0 + THEN round(abs(ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0) + ELSE round(abs(ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0) + END as distance, + CASE + WHEN /* distance > 0 */ + CASE WHEN ta.is_reversed = 0 + THEN ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) + ELSE ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) + END > 0 + THEN + CASE + WHEN ta.is_reversed = 0 + THEN '-' + ELSE '+' + END + ELSE + CASE + WHEN ta.is_reversed = 1 + THEN '-' + ELSE '+' + END + END as direction, + sr.score1 as score + FROM TranscriptAttributes ta, + Results.segmentresult sr, + Study.StudyLink sl, + Study.Study s + WHERE sr.na_sequence_id = ta.na_sequence_id + AND s.study_id = sl.study_id + AND sl.protocol_app_node_id = sr.protocol_app_node_id + AND lower(s.name) like '%chip%peaks' + AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000) + or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) ) + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql new file mode 100644 index 0000000000..44e43c5b10 --- /dev/null +++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql @@ -0,0 +1,7 @@ + + + create index chpgene_geneid_idx ON ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql new file mode 100644 index 0000000000..d22fda9dd9 --- /dev/null +++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql @@ -0,0 +1,19 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE ChrCopyNumbers AS + SELECT DISTINCT ta.na_sequence_id + , ta.chromosome + , ccn.chr_copy_number AS ploidy + , io.input_pan_id + , io.output_pan_id + FROM apidb.ChrCopyNumber ccn + , TranscriptAttributes ta + , PANIo io + WHERE ta.na_sequence_id = ccn.na_sequence_id + AND ta.chromosome IS NOT NULL + AND ccn.protocol_app_node_id = io.output_pan_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql new file mode 100644 index 0000000000..ab77efc977 --- /dev/null +++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql @@ -0,0 +1,16 @@ + + + CREATE INDEX ChrCN_ix + ON ChrCopyNumbers (input_pan_id, na_sequence_id) + + + ; + + + + CREATE INDEX ChrCN_output + ON ChrCopyNumbers (output_pan_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/CodingSequence.psql b/Model/lib/psql/webtables/MO/CodingSequence.psql new file mode 100644 index 0000000000..e1560b855f --- /dev/null +++ b/Model/lib/psql/webtables/MO/CodingSequence.psql @@ -0,0 +1,14 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE CodingSequence AS + SELECT ta.source_id, ta.project_id, + SUBSTR(sns.sequence, tf.translation_start::INTEGER, + tf.translation_stop::INTEGER - tf.translation_start::INTEGER + 1) as sequence + FROM TranscriptAttributes ta, dots.SplicedNaSequence sns, dots.TranslatedAaFeature tf + WHERE ta.source_id = sns.source_id + AND ta.na_feature_id = tf.na_feature_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql new file mode 100644 index 0000000000..26d8f1c327 --- /dev/null +++ b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql @@ -0,0 +1,7 @@ + + + create index CodSeq_ix on CodingSequence (source_id, project_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql new file mode 100644 index 0000000000..9d7b830c6d --- /dev/null +++ b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql @@ -0,0 +1,23 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE DatasetExampleSourceId AS + WITH profiles AS ( + SELECT p.source_id, + ga.project_id, + ga.sequence_id, + d.name, + row_number() over(partition by d.name + order by ga.chromosome_order_num, p.profile_as_string desc) as rn + FROM Profile p + INNER JOIN sres.ExternalDatabase d ON p.dataset_name = d.name + LEFT JOIN GeneAttributes ga ON p.source_id = ga.source_id + WHERE p.profile_as_string is not null + ) + SELECT p.source_id as example_source_id, p.project_id, p.sequence_id, p.name as dataset + FROM profiles p + WHERE p.rn = 1 + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql b/Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/EqtlSpan.psql b/Model/lib/psql/webtables/MO/EqtlSpan.psql new file mode 100644 index 0000000000..10215fca8e --- /dev/null +++ b/Model/lib/psql/webtables/MO/EqtlSpan.psql @@ -0,0 +1,25 @@ +:CREATE_AND_POPULATE + + + create table eqtlSpan as + SELECT gene_source_id, project_id, haplotype_block_name as hapblock_id, sequence_id, + start_min, end_max, start_max, end_min, + max(score) as lod_score, organism + FROM (SELECT ga.gene_source_id, ga.project_id, gls.haplotype_block_name, + ens.source_id as sequence_id, nl.start_min, nl.end_max, nl.start_max, nl.end_min, + gls.lod_score_mant * power(10::double precision, gls.lod_score_exp) as score, + replace (ga.organism, ' ', '+') as organism + FROM dots.ChromosomeElementFeature cef, apidb.NAFeatureHaploblock gls, + dots.ExternalNaSequence ens, dots.NaLocation nl, TranscriptAttributes ga + WHERE gls.na_feature_id = ga.gene_na_feature_id + AND cef.name = gls.haplotype_block_name + AND nl.na_feature_id = cef.na_feature_id + AND cef.na_sequence_id = ens.na_sequence_id + AND (gls.lod_score_mant * power(10::double precision, gls.lod_score_exp)) >= 1.5 + ) t + GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name, + start_min, end_max, start_max, end_min, organism + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql new file mode 100644 index 0000000000..ef659c7e2a --- /dev/null +++ b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql @@ -0,0 +1,8 @@ + + + create index eqtlSpan_ix + on eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score) + + + ; + diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql new file mode 100644 index 0000000000..1985445f0a --- /dev/null +++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql @@ -0,0 +1,68 @@ + + + CREATE UNLOGGED TABLE EstAlignmentGene AS + SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession, + e.library_id, ba.query_taxon_id, ba.target_na_sequence_id, + ba.target_taxon_id, ba.percent_identity, ba.is_consistent, + ba.is_best_alignment, ba.is_reversed, ba.target_start, ba.target_end, + ga.sequence_id AS target_sequence_source_id, + least(ba.target_end, ga.gene_end_max) + - greatest(ba.target_start, ga.gene_start_min) + 1 + AS est_gene_overlap_length, + ba.query_bases_aligned / (query_sequence.length) + * 100 AS percent_est_bases_aligned, + ga.gene_source_id AS gene + FROM dots.BlatAlignment ba, dots.Est e, + apidbtuning.TranscriptAttributes ga, + dots.NaSequence query_sequence + WHERE e.na_sequence_id = ba.query_na_sequence_id + AND ga.na_sequence_id = ba.target_na_sequence_id + AND least(ba.target_end, ga.gene_end_max) - greatest(ba.target_start, ga.gene_start_min) >= 0 + AND query_sequence.na_sequence_id = ba.query_na_sequence_id + + ; + + + + CREATE UNLOGGED TABLE EstAlignmentNoGene AS + SELECT * from EstAlignmentGene WHERE 1=0 UNION /* define datatype for null column */ + SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession, + e.library_id, ba.query_taxon_id, ba.target_na_sequence_id, + ba.target_taxon_id, ba.percent_identity, ba.is_consistent, + ba.is_best_alignment, ba.is_reversed, ba.target_start, ba.target_end, + sequence.source_id AS target_sequence_source_id, + NULL AS est_gene_overlap_length, + ba.query_bases_aligned / (query_sequence.length) + * 100 AS percent_est_bases_aligned, + NULL AS gene + FROM dots.BlatAlignment ba, dots.Est e, dots.AssemblySequence aseq, + dots.NaSequence sequence, dots.NaSequence query_sequence + WHERE e.na_sequence_id = ba.query_na_sequence_id + AND e.na_sequence_id = query_sequence.na_sequence_id + AND aseq.na_sequence_id = ba.query_na_sequence_id + AND ba.target_na_sequence_id = sequence.na_sequence_id + AND ba.blat_alignment_id IN + ( /* set of blat_alignment_ids not in in first leg of UNION */ + /* (because they overlap no genes) */ + SELECT ba.blat_alignment_id + FROM dots.BlatAlignment ba, dots.NaSequence query_sequence, + sres.OntologyTerm so + WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id + AND query_sequence.sequence_ontology_id = so.ontology_term_id + AND so.name = 'EST' + EXCEPT + SELECT blat_alignment_id FROM EstAlignmentGene) + + ; + +:CREATE_AND_POPULATE + + + CREATE TABLE EstAlignmentGeneSummary AS + SELECT * FROM EstAlignmentNoGene + UNION + SELECT * FROM EstAlignmentGene + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql new file mode 100644 index 0000000000..6dec9178b5 --- /dev/null +++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql @@ -0,0 +1,20 @@ + + + create index EstSumm_libOverlap_ix + ON EstAlignmentGeneSummary + (library_id, percent_identity, is_consistent, + est_gene_overlap_length, percent_est_bases_aligned) + + + ; + + + + create index EstSumm_estSite_ix + ON EstAlignmentGeneSummary + (target_sequence_source_id, target_start, target_end, + library_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/EstAttributes.psql b/Model/lib/psql/webtables/MO/EstAttributes.psql new file mode 100644 index 0000000000..17da985e7f --- /dev/null +++ b/Model/lib/psql/webtables/MO/EstAttributes.psql @@ -0,0 +1,51 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE EstAttributes AS + SELECT + cast(apidb.project_id(tn.name) as varchar(20)) as project_id, + ens.source_id, + e.seq_primer AS primer, + ens.a_count, + ens.c_count, + ens.g_count, + ens.t_count, + (ens.length - (ens.a_count + ens.c_count + ens.g_count + ens.t_count)) + AS other_count, + ens.length, + replace(l.dbest_name, '''', '-') as dbest_name, + coalesce(regexp_replace(l.vector, '^\s+$', null), 'unknown') AS vector, + coalesce(regexp_replace(l.stage, '^\s+$', null), 'unknown') AS stage, + SUBSTR(CASE + WHEN tn.name = 'Giardia lamblia' THEN 'Giardia Assemblage A isolate WB' + ELSE tn.name + END, 1, 100) AS organism, + taxon.ncbi_tax_id, + ed.name AS external_db_name, + coalesce(best.best_alignment_count, 0) AS best_alignment_count, + l.library_id, replace(l.dbest_name, '''', '-') as library_dbest_name + FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm, + sres.TaxonName tn, sres.ExternalDatabase ed, + sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens + LEFT JOIN + (select query_na_sequence_id,max(ct) as best_alignment_count + from ( + SELECT query_na_sequence_id, COUNT(*) AS ct + FROM dots.BlatAlignment ba + WHERE is_best_alignment = 1 + GROUP BY target_external_db_release_id,query_na_sequence_id) t + group by query_na_sequence_id + ) best ON ens.na_sequence_id = best.query_na_sequence_id + WHERE e.na_sequence_id = ens.na_sequence_id + AND e.library_id = l.library_id + AND ens.taxon_id = tn.taxon_id + AND ens.taxon_id = taxon.taxon_id + AND tn.name_class='scientific name' + AND ens.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND ens.sequence_ontology_id = oterm.ontology_term_id + AND oterm.name = 'EST' + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/EstAttributes_ix.psql b/Model/lib/psql/webtables/MO/EstAttributes_ix.psql new file mode 100644 index 0000000000..3708681405 --- /dev/null +++ b/Model/lib/psql/webtables/MO/EstAttributes_ix.psql @@ -0,0 +1,7 @@ + + + create unique index EstAttr_source_id ON EstAttributes (source_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/EstSequence.psql b/Model/lib/psql/webtables/MO/EstSequence.psql new file mode 100644 index 0000000000..9dc3effb71 --- /dev/null +++ b/Model/lib/psql/webtables/MO/EstSequence.psql @@ -0,0 +1,16 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE EstSequence AS + SELECT ens.source_id, + cast(apidb.project_id(tn.name) as varchar(20)) as project_id, + ens.sequence + FROM dots.ExternalNaSequence ens, sres.OntologyTerm oterm, sres.TaxonName tn + WHERE oterm.name = 'EST' + AND oterm.ontology_term_id = ens.sequence_ontology_id + AND ens.taxon_id = tn.taxon_id + AND tn.name_class = 'scientific name' + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/EstSequence_ix.psql b/Model/lib/psql/webtables/MO/EstSequence_ix.psql new file mode 100644 index 0000000000..b7010a62aa --- /dev/null +++ b/Model/lib/psql/webtables/MO/EstSequence_ix.psql @@ -0,0 +1,7 @@ + + + create index EstSeq_ix on EstSequence (source_id, project_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneAttributes.psql b/Model/lib/psql/webtables/MO/GeneAttributes.psql new file mode 100644 index 0000000000..8c78e847c9 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneAttributes.psql @@ -0,0 +1,113 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVGeneAttributes AS + SELECT DISTINCT project_id + , ta.gene_source_id AS source_id + , gene_na_feature_id AS na_feature_id + , na_sequence_id + , is_reversed + , gene_start_min AS start_min + , gene_end_max AS end_max + , CASE strand WHEN 'forward' THEN '+' WHEN 'reverse' THEN '-' ELSE null END as strand_plus_minus + , sequence_id + , gene_name AS name + , COALESCE(aggregates.product, aggregates.transcript_product) as old_product + , COALESCE(gp.product, 'unspecified product') as product + , gene_type + , gene_ebi_biotype + , gene_id + , aggregates.is_pseudo + , organism + , species + , taxon_id + , species as genus_species + , strain + , ncbi_tax_id + , so_id + , so_term_name + , so_term_definition + , so_version + , anticodon + , external_db_name + , external_db_version + , external_db_rls_id + , chromosome + , chromosome_order_num + , sequence_type + , gene_transcript_count AS transcript_count + , gene_exon_count as exon_count + , gene_previous_ids as previous_ids + , is_deprecated + , gene_paralog_number as paralog_number + , gene_ortholog_number as ortholog_number + , gene_context_start as context_start + , gene_context_end as context_end + , orthomcl_name + , gene_total_hts_snps as total_hts_snps + , gene_hts_nonsynonymous_snps as hts_nonsynonymous_snps + , gene_hts_stop_codon_snps as hts_stop_codon_snps + , gene_hts_noncoding_snps as hts_noncoding_snps + , gene_hts_synonymous_snps as hts_synonymous_snps + , gene_hts_nonsyn_syn_ratio as hts_nonsyn_syn_ratio + , comment_string + , uniprot.uniprot_id + , uniprot.uniprot_id_internal + , gene_entrez_id as entrez_id + , representative_transcript + , gene_zoom_context_start as zoom_context_start + , gene_zoom_context_end as zoom_context_end + , cast (null as numeric) as strain_count + , ta.gene_locations as locations + FROM :ORG_ABBREVTranscriptAttributes ta + INNER JOIN ( + SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo, MIN(gene_product) AS product, + substr(STRING_AGG(transcript_product, ',' order by transcript_product), 1, 240) as transcript_product + FROM :ORG_ABBREVTranscriptAttributes + GROUP BY gene_source_id + ) aggregates ON ta.gene_source_id = aggregates.gene_source_id + LEFT JOIN ( + SELECT na_feature_id, + substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id, + substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal + FROM ( + SELECT distinct t.parent_id as na_feature_id, dr.primary_identifier as uniprot_id + FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t, + sres.ExternalDatabase d, sres.ExternalDatabaseRelease r + WHERE dr.db_ref_id = x.DB_REF_ID + AND (x.na_feature_id = t.na_feature_id OR x.na_feature_id = t.parent_id) + AND dr.external_database_release_id = r.external_database_release_id + AND r.external_database_id = d.external_database_id + AND (d.name like '%uniprot_dbxref_RSRC' + OR d.name like '%dbxref_gene2Uniprot_RSRC' + OR d.name = 'Links to Uniprot Genes' + OR d.name like '%_dbxref_uniprot_from_annotation_RSRC') + ) t + GROUP BY na_feature_id + ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id + LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id + ORDER BY ta.gene_source_id + + +:DECLARE_PARTITION; + + + + CREATE TABLE :ORG_ABBREVSpeciesInfo as + SELECT genus_species, count(distinct organism) as strain_count + FROM :ORG_ABBREVGeneAttributes + GROUP BY genus_species + + ; + + + + UPDATE :ORG_ABBREVGeneAttributes ga + SET strain_count = ( + SELECT strain_count + FROM :ORG_ABBREVSpeciesInfo si + WHERE si.genus_species = ga.genus_species + ) + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql new file mode 100644 index 0000000000..a63551450d --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql @@ -0,0 +1,90 @@ + + + CREATE UNIQUE INDEX GeneAttr_srcPrj + ON :ORG_ABBREVGeneAttributes (source_id) + + + ; + + + + CREATE INDEX GeneAttr_exon_ix + ON :ORG_ABBREVGeneAttributes (exon_count, source_id, project_id) + + + ; + + + + CREATE INDEX GeneAttr_loc_ix + ON :ORG_ABBREVGeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated) + + + ; + + + + CREATE INDEX GeneAttr_feat_ix + ON :ORG_ABBREVGeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed) + + + ; + + + + CREATE INDEX GeneAttr_orthoname_ix ON :ORG_ABBREVGeneAttributes ( + orthomcl_name, source_id, taxon_id, gene_type, na_feature_id, + na_sequence_id, start_min, end_max, organism, species, + product, project_id + ) + + ; + + + + CREATE INDEX GeneAttr_ortholog_ix + ON :ORG_ABBREVGeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id) + + + ; + + + + CREATE INDEX GeneAttr_orgsrc_ix + ON :ORG_ABBREVGeneAttributes (organism, source_id, na_sequence_id, start_min, end_max) + + + ; + + + + CREATE INDEX GeneAttr_prjsrc_ix + ON :ORG_ABBREVGeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0)) + + + ; + + + + CREATE INDEX GeneAttr_txid_ix + ON :ORG_ABBREVGeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id) + + + ; + + + + CREATE INDEX GeneAttr_ids_ix + ON :ORG_ABBREVGeneAttributes (na_feature_id, source_id, project_id) + + + ; + + + + CREATE INDEX GeneAttr_loc_intjunc_ix + ON :ORG_ABBREVGeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql new file mode 100644 index 0000000000..ac518347bf --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql @@ -0,0 +1,29 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE GeneCopyNumbers AS + SELECT DISTINCT ta.project_id + , ta.source_id + , ta.gene_source_id + , REGEXP_REPLACE(pan.name, '_[A-Za-z0-9]+ (.+)$', '') AS strain + , gcn.haploid_number AS raw_estimate + , gcn.ref_copy_number AS ref_cn + , CASE WHEN (gcn.haploid_number < 0.01) THEN 0 + WHEN (0.01 < gcn.haploid_number AND gcn.haploid_number < 1.85) THEN 1 + ELSE round(gcn.haploid_number) END AS haploid_number + , ta.chromosome + , ta.na_sequence_id + , io.input_pan_id + , io.output_pan_id + FROM apidb.genecopynumber gcn + , study.protocolappnode pan + , TranscriptAttributes ta + , PANIo io + WHERE gcn.protocol_app_node_id = pan.protocol_app_node_id + AND gcn.na_feature_id = ta.gene_na_feature_id + AND gcn.protocol_app_node_id = io.output_pan_id + AND (ta.gene_type = 'protein coding' or ta.gene_type = 'protein coding gene') + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql new file mode 100644 index 0000000000..7acf69d823 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql @@ -0,0 +1,8 @@ + + + CREATE INDEX GeneCN_ix + ON GeneCopyNumbers (input_pan_id, na_sequence_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneGoTable.psql b/Model/lib/psql/webtables/MO/GeneGoTable.psql new file mode 100644 index 0000000000..b83c188e7f --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneGoTable.psql @@ -0,0 +1,26 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE GeneGoTable AS + SELECT source_id, project_id, go_id, + string_agg(transcript_source_id, ', ' order by transcript_source_id) as transcript_ids, + is_not, + max(go_term_name) as go_term_name, ontology, source, evidence_code, + reference, evidence_code_parameter, sort_key + FROM (SELECT DISTINCT ggt.gene_source_id as source_id, ga.project_id, + replace(ggt.go_id, 'GO_', 'GO:') as go_id, + ggt.transcript_source_id, + case when ggt.is_not = 'not' then 'Is not' else '' end AS is_not, + ggt.go_term_name, ggt.ontology, ggt.source, ggt.evidence_code, + ggt.reference, ggt.evidence_code_parameter, + substr(ggt.ontology, 1, 1) || replace(ggt.go_id, 'GO_', 'GO:') as sort_key + FROM GeneGoTerms ggt, GeneAttributes ga + WHERE ggt.gene_source_id = ga.source_id + ) t + GROUP BY source_id, project_id, go_id, is_not, ontology, + source, evidence_code, reference, evidence_code_parameter, sort_key + ORDER BY source_id, ontology, go_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql new file mode 100644 index 0000000000..de449a0e54 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql @@ -0,0 +1,9 @@ + + + create index ggtab_ix ON GeneGoTable + (source_id, project_id, go_id, transcript_ids, is_not, go_term_name, + ontology, source, evidence_code, reference, evidence_code_parameter, sort_key) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms.psql b/Model/lib/psql/webtables/MO/GeneGoTerms.psql new file mode 100644 index 0000000000..4cd39239a2 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneGoTerms.psql @@ -0,0 +1,39 @@ +:CREATE_AND_POPULATE + + + create table :ORG_ABBREVGeneGoTerms as + with root_term + as (select ontology_term_id, + cast(initcap(replace(name, '_', ' ')) as varchar(20)) as ontology + from sres.OntologyTerm + where source_id in ('GO_0008150','GO_0003674','GO_0005575')) + select gf.source_id as gene_source_id, t.source_id as transcript_source_id, taf.aa_sequence_id, + cast (CASE ga.is_not WHEN 0 THEN '' WHEN 1 THEN 'not' ELSE ga.is_not::varchar END as varchar(3)) as is_not, ns.taxon_id, + cast (gt.source_id as varchar(20)) as go_id, + gt.ontology_term_id as go_term_id, rt.ontology, + cast(gt.name as varchar(250)) as go_term_name, + cast(gail.name as varchar(24)) as source, + cast(gec.name as varchar(12)) as evidence_code, + cast(gaiec.reference as varchar(250)) as reference, + cast(gaiec.evidence_code_parameter as varchar(80))as evidence_code_parameter + from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf, dots.GoAssociation ga, + dots.GoAssociationInstance gai, dots.GoAssociationInstanceLoe gail, + dots.GoAssocInstEvidCode gaiec, sres.OntologyTerm gec, dots.NaSequence ns, + sres.OntologyTerm gt LEFT JOIN root_term rt ON gt.ancestor_term_id = rt.ontology_term_id + where t.parent_id = gf.na_feature_id + and gf.na_sequence_id = ns.na_sequence_id + and (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + and t.na_feature_id = taf.na_feature_id + and taf.aa_sequence_id = ga.row_id + and ga.table_id = (select table_id + from core.TableInfo + where name = 'TranslatedAASequence') + and ga.go_term_id = gt.ontology_term_id + and ga.go_association_id = gai.go_association_id + and gai.go_assoc_inst_loe_id = gail.go_assoc_inst_loe_id + and gai.go_association_instance_id = gaiec.go_association_instance_id + and gaiec.go_evidence_code_id = gec.ontology_term_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql new file mode 100644 index 0000000000..8bb63eb7e4 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql @@ -0,0 +1,10 @@ + + + create index ggt_ix ON :ORG_ABBREVGeneGoTerms + (gene_source_id, transcript_source_id, ontology, go_id, go_term_id, + go_term_name, source, evidence_code, reference, + evidence_code_parameter, aa_sequence_id, is_not) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneId.psql b/Model/lib/psql/webtables/MO/GeneId.psql new file mode 100644 index 0000000000..41e89c83f0 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneId.psql @@ -0,0 +1,265 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVGeneId AS + SELECT substr(mapping.id, 1, 100) as id, mapping.gene, cast (0 as NUMERIC(1)) as unique_mapping, + SUBSTR(string_agg(distinct union_member,'; ' order by union_member), 1, 100) as union_member, + SUBSTR(string_agg(distinct database_name,'; ' order by database_name), 1, 200) as database_name + FROM (SELECT substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) AS id, + gf.source_id AS gene, + 'Transcript.protein_id before dot' as union_member, ed.name as database_name /* dots.Transcript.protein_id, trimmed at period */ + FROM dots.Transcript t, dots.GeneFeature gf, + sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr + WHERE t.parent_id = gf.na_feature_id + AND substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) IS NOT NULL + AND gf.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + UNION + SELECT t.protein_id AS id, + gf.source_id AS gene, + 'Transcript.protein_id' as union_member, ed.name as database_name /* dots.Transcript.protein_id */ + FROM dots.Transcript t, dots.GeneFeature gf, + sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr + WHERE t.parent_id = gf.na_feature_id + AND t.protein_id IS NOT NULL + AND gf.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + UNION + SELECT dr.primary_identifier AS id, + gf.source_id AS gene, + 'DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */ + FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + sres.DbRef dr, sres.ExternalDatabaseRelease edr, + sres.ExternalDatabase ed + WHERE dr.primary_identifier IS NOT NULL + AND gf.na_feature_id = drnf.na_feature_id + AND drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id + = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + -- CHECK AND FIX + --AND NOT (ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity') + -- AND NOT REGEXP_LIKE (dr.primary_identifier, '\D') ) + AND NOT ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity') + AND NOT edr.id_type = 'synonym' + UNION + SELECT dr.primary_identifier AS id, + gf.source_id AS gene, + 'DbRef.primary_identifier on Transcript' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */ + FROM dots.GeneFeature gf, dots.Transcript t, dots.DbRefNaFeature drnf, + sres.DbRef dr, sres.ExternalDatabaseRelease edr, + sres.ExternalDatabase ed + WHERE dr.primary_identifier IS NOT NULL + AND gf.na_feature_id = t.parent_id + AND t.na_feature_id = drnf.na_feature_id + AND drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id + = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + -- CHECK AND FIX + --AND NOT (ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity') + -- AND NOT REGEXP_LIKE (dr.primary_identifier, '\D') ) + AND NOT ed.name in ('NRDB_gb_dbXRefBySeqIdentity','NRDB_ref_dbXRefBySeqIdentity') + UNION + SELECT dr.primary_identifier AS id, + gf.source_id AS gene, + 'DbRef.primary_identifier on Gene' as union_member, ed.name as database_name + FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + sres.DbRef dr, sres.ExternalDatabaseRelease edr, + sres.ExternalDatabase ed + WHERE dr.primary_identifier IS NOT NULL + AND gf.na_feature_id = drnf.na_feature_id + AND drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND ed.name ='RefSeq_gene_name' + UNION + SELECT dr.primary_identifier AS id, + gf.source_id AS gene, + 'VectorBase alternate names' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */ + FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + sres.DbRef dr, sres.ExternalDatabaseRelease edr, + sres.ExternalDatabase ed + WHERE dr.primary_identifier IS NOT NULL + AND gf.na_feature_id = drnf.na_feature_id + AND drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id + = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + and ed.name in ('VB_Community_Annotation', 'VB_Community_Symbol') /* vectorbase alt names */ + UNION + SELECT dr.primary_identifier AS id, + gf.source_id AS gene, + 'synonym' as union_member, ed.name as database_name + FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + sres.DbRef dr, sres.ExternalDatabaseRelease edr, + sres.ExternalDatabase ed + WHERE dr.primary_identifier IS NOT NULL + AND gf.na_feature_id = drnf.na_feature_id + AND drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND edr.id_type = 'synonym' + UNION + SELECT dr.secondary_identifier AS id, + gf.source_id AS gene, + 'DbRef.secondary_identifier' as union_member, ed.name as database_name /* sres.DbRef.secondary_identifier */ + FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + sres.DbRef dr, sres.ExternalDatabaseRelease edr, + sres.ExternalDatabase ed + WHERE dr.secondary_identifier IS NOT NULL + AND gf.na_feature_id = drnf.na_feature_id + AND drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id + = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND ed.name IN ('NRDB_gb_dbXRefBySeqIdentity', + 'NRDB_pdb_dbXRefBySeqIdentity', + 'NRDB_ref_dbXRefBySeqIdentity', + 'NRDB_sp_dbXRefBySeqIdentity', + 'Predicted protein structures','Pf_predictedProteinStructures_RSRC', + 'GenBank') + UNION + SELECT dr.primary_identifier AS id, + gf.source_id AS gene, + 'genbank DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier for Genbank records */ + FROM dots.GeneFeature gf, dots.Transcript t, dots.DbRefNaSequence drns, + sres.DbRef dr, sres.ExternalDatabaseRelease edr, + sres.ExternalDatabase ed + WHERE gf.na_feature_id = t.parent_id + AND t.na_sequence_id = drns.na_sequence_id + AND drns.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND ed.name = 'GenBank' + UNION + SELECT pred_loc.feature_source_id AS id, + gene_loc.feature_source_id AS gene, + 'overlapping predicted gene source_id' as union_member, ed.name as database_name /* dots.GeneFeature.source_id for predicted genes that overlap */ + FROM apidb.FeatureLocation gene_loc, apidb.FeatureLocation pred_loc, + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + WHERE pred_loc.feature_type = 'GenePrediction' + AND gene_loc.feature_type = 'GeneFeature' + AND pred_loc.na_sequence_id = gene_loc.na_sequence_id + AND gene_loc.start_min <= pred_loc.end_max + AND gene_loc.end_max >= pred_loc.start_min + AND pred_loc.is_reversed = gene_loc.is_reversed + AND pred_loc.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + UNION + SELECT ng.name AS id, gf.source_id AS gene, + 'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */ + FROM dots.GeneFeature gf, dots.NaFeatureNaGene nfng, dots.NaGene ng, + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + WHERE gf.na_feature_id = nfng.na_feature_id + AND ng.na_gene_id = nfng.na_gene_id + AND gf.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + UNION + SELECT source_id AS id, source_id AS gene, + 'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */ + FROM dots.GeneFeature gf, + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + WHERE gf.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + UNION + SELECT n.name AS id, gf.source_id AS gene, + 'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name + from dots.genefeature gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d, + ( select na_feature_id, name + from apidb.GeneFeatureName + where is_preferred = 1 + EXCEPT + -- suppress gene/name associations from the *DELETED_RSRC databases + select gfn.na_feature_id, gfn.name + from apidb.GeneFeatureName gfn, + sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr + where gfn.external_database_release_id = edr.external_database_release_id + and ed.external_database_id = edr.external_database_id + and ed.name like '%DELETED_RSRC' + ) n + where n.na_feature_id = gf.na_feature_id + and gf.external_database_release_id = r.external_database_release_id + and r.external_database_id = d.external_database_id + UNION + select dr.primary_identifier as id, + gf.source_id as gene, + 'AA feature DbRef primary ID' as union_member, + ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */ + from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf, + dots.DbRefAaFeature draf, sres.DbRef dr, + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + where gf.na_feature_id = t.parent_id + and t.na_feature_id = taf.na_feature_id + and taf.aa_feature_id = draf.aa_feature_id + and draf.db_ref_id = dr.db_ref_id + and dr.external_database_release_id = edr.external_database_release_id + and edr.external_database_id = ed.external_database_id + and ed.name + not in ('INTERPRO', 'PFAM', 'PIRSF', 'PRODOM', 'PROSITEPROFILES', + 'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER', + 'PRINTS','SCANPROSITE','SFLD') + ) mapping, + dots.GeneFeature gf, dots.NaSequence ns + WHERE mapping.gene = gf.source_id + AND gf.na_sequence_id = ns.na_sequence_id + AND (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + AND (gf.is_predicted != 1 OR gf.is_predicted is null) + GROUP BY mapping.id, mapping.gene + + +:DECLARE_PARTITION; + + + + INSERT INTO :ORG_ABBREVGeneId + (id, gene, unique_mapping, union_member, database_name) + WITH munge + AS (SELECT DISTINCT + regexp_replace(id, '\.\d\d?$', '') as id, + gene, unique_mapping, union_member, database_name + FROM :ORG_ABBREVGeneId + -- CHECK AND FIX + --WHERE regexp_like(id, '(.*)\.\d\d?$') + ) + SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name + FROM munge + WHERE id NOT IN (SELECT id FROM :ORG_ABBREVGeneId ) + + ; + + + + CREATE UNLOGGED TABLE :ORG_ABBREVOneGeneIds (lower_id) AS + SELECT lower_id + FROM (SELECT DISTINCT lower(id) as lower_id, gene + FROM :ORG_ABBREVGeneId + ) t + GROUP BY lower_id + HAVING count(*) = 1 + + ; + + + + CREATE UNIQUE INDEX gix _pk ON :ORG_ABBREVOneGeneIds (lower_id) + + + ; + + + + UPDATE :ORG_ABBREVGeneId + SET unique_mapping = 1 + WHERE id = gene + + ; + + + + UPDATE :ORG_ABBREVGeneId + SET unique_mapping = 1 + WHERE lower(id) IN (select lower_id from :ORG_ABBREVOneGeneIds) + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneId_ix.psql b/Model/lib/psql/webtables/MO/GeneId_ix.psql new file mode 100644 index 0000000000..ef84acad08 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneId_ix.psql @@ -0,0 +1,35 @@ + + + CREATE INDEX GeneId_gene_idx ON :ORG_ABBREVGeneId (gene, id) + + + ; + + + + CREATE INDEX GeneId_id_idx ON :ORG_ABBREVGeneId (id, gene) + + + ; + + + + CREATE INDEX GeneId_uniqid_idx ON :ORG_ABBREVGeneId (unique_mapping, id, gene) + + + ; + + + + CREATE INDEX GeneId_lowid_idx ON :ORG_ABBREVGeneId (lower(id), gene) + + + ; + + + + CREATE INDEX GeneId_uniqlowid_idx ON :ORG_ABBREVGeneId (unique_mapping, lower(id), gene) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats.psql new file mode 100644 index 0000000000..b8a8e7dbcf --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats.psql @@ -0,0 +1,25 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE GeneIntJuncStats AS + WITH org_tot AS ( + SELECT + gs.organism, + min(gij.total_unique) as min_annot_score, PERCENTILE_cont(0.005) within group (order by gij.total_unique asc) as perc005_annot_score, + PERCENTILE_cont(0.01) within group (order by gij.total_unique asc) as perc01_annot_score, + min(gij.percent_max) as min_annot_percent_max, PERCENTILE_cont(0.0001) within group (order by gij.percent_max asc) as perc0001_annot_percent_max, + PERCENTILE_cont(0.0005) within group (order by gij.percent_max asc) as perc0005_annot_percent_max, + floor(max(gij.segment_end - gij.segment_start) * 1.25) as max_intron_length + FROM geneintronjunction gij, genomicseqattributes gs + WHERE gs.na_sequence_id = gij.na_sequence_id + AND gij.annotated_intron = 'Yes' + GROUP BY gs.organism + ) + SELECT gs.na_sequence_id, gs.source_id, ot.* + FROM genomicseqattributes gs, org_tot ot + WHERE gs.organism = ot.organism + AND gs.na_sequence_id in (SELECT DISTINCT na_sequence_id FROM apidb.intronjunction) + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql new file mode 100644 index 0000000000..d2d463f8a2 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql @@ -0,0 +1,6 @@ + + + create index GeneIntJuncStat_ix on GeneIntJuncStats (na_sequence_id) + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction.psql new file mode 100644 index 0000000000..1f42b544b4 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneIntronJunction.psql @@ -0,0 +1,223 @@ + + + CREATE UNLOGGED TABLE PanIOgij AS + SELECT i.protocol_app_id, i.protocol_app_node_id as input_pan_id, o.protocol_app_node_id as output_pan_id + FROM study.output o, study.input i + WHERE + o.protocol_app_node_id in ( + SELECT DISTINCT protocol_app_node_id + FROM study.protocolappnode where name like '%junctions%' or name like '%htseq%' + ) + AND o.protocol_app_id = i.protocol_app_id + + ; + + + + CREATE UNLOGGED TABLE annotgij ( + na_sequence_id, + start_min, + end_max, + is_reversed, + feature_type + ) AS + SELECT il.na_sequence_id, il.start_min,il.end_max, il.is_reversed, 'Intron' as feature_type + FROM apidb.IntronLocation il + GROUP by il.na_sequence_id, il.start_min,il.end_max,il.is_reversed + + ; + + + + CREATE UNIQUE INDEX annottmpnew_pk_ix ON annotgij (na_sequence_id,start_min,end_max,is_reversed,feature_type) + + + ; + + + + create table GeneIdLocGIJ ( + na_sequence_id NUMERIC(10), + start_min NUMERIC, + is_reversed NUMERIC, + end_max NUMERIC, + na_feature_id NUMERIC(10), + source_id varchar(100), + total_expression NUMERIC + ) + + ; + + + + CREATE UNIQUE INDEX gnattidloc_pk_ix ON GeneIdLocGIJ (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression) + + + ; + + + + DO $$ + DECLARE + idlist RECORD; + BEGIN + FOR idlist IN ( SELECT DISTINCT na_sequence_id FROM apidb.intronjunction) + LOOP + INSERT INTO GeneIdLocGIJ ( + SELECT gf.na_sequence_id,l.start_min,l.is_reversed,l.end_max,gf.na_feature_id,gf.source_id, + round(sum(nafe.value)::NUMERIC,2) as total_expression + FROM dots.genefeature gf, dots.nalocation l, namemappinggij je, results.nafeatureexpression nafe + WHERE gf.na_sequence_id = idlist.na_sequence_id + AND l.na_feature_id = gf.na_feature_id + AND gf.na_feature_id = nafe.na_feature_id + AND nafe.protocol_app_node_id = je.exp_pan_id + GROUP BY gf.na_sequence_id,l.start_min,l.is_reversed,l.end_max,gf.na_feature_id,gf.source_id + ); + commit; + END LOOP; + END; + $$ LANGUAGE PLPGSQL; + + ; + + + + create index gnidloc_nafid_ix on GeneIdLocGIJ (na_feature_id) + + ; + + + + create UNLOGGED table GIJtmp ( + NA_SEQUENCE_ID NUMERIC(10), + SEQUENCE_SOURCE_ID VARCHAR(100), + SEGMENT_START NUMERIC, + SEGMENT_END NUMERIC, + TOTAL_UNIQUE NUMERIC, + TOTAL_ISRPM NUMERIC, + IS_REVERSED NUMERIC(1), + INTRON_FEATURE_ID VARCHAR(200), + MATCHES_GENE_STRAND NUMERIC, + GENE_SOURCE_ID VARCHAR(100), + GENE_NA_FEATURE_ID NUMERIC, + ANNOTATED_INTRON VARCHAR(10) + ) + + ; + + + + DO $$ + DECLARE + iter_length numeric := 4999; + i_first_pos numeric := 1; + i_last_pos numeric := i_first_pos + iter_length; + idlist RECORD; + BEGIN + FOR idlist IN ( + SELECT na_sequence_id, length, taxon_id, CASE WHEN step_mult > 500000 THEN 500000 ELSE step_mult END as seq_step_mult + FROM ( + SELECT gs.na_sequence_id, gs.length, gs.taxon_id, 25000 * (1 + floor(gs.length/count(*))) as step_mult + FROM apidb.intronjunction ij, dots.nasequence gs + WHERE gs.na_sequence_id = ij.na_sequence_id + GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id + ) t + ORDER BY taxon_id + ) + LOOP + iter_length := idlist.seq_step_mult; + i_first_pos := 1; + i_last_pos := i_first_pos + iter_length; + WHILE i_first_pos < idlist.length + LOOP + INSERT INTO GIJtmp + SELECT DISTINCT + junc.*, + CASE + WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed + THEN 1 + ELSE 0 + END as matches_gene_strand, + last_value(ga.source_id) over w1 as gene_source_id, + last_value(ga.na_feature_id) over w1 as gene_na_feature_id, + CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron + FROM ( + SELECT ij.na_sequence_id,seq.source_id as sequence_source_id,ij.segment_start,ij.segment_end, + sum(ij.unique_reads) as total_unique, round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm, + ij.is_reversed,seq.source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id + FROM apidb.intronjunction ij, namemappinggij je, dots.nasequence seq + WHERE ij.na_sequence_id = idlist.na_sequence_id + AND ij.segment_start between i_first_pos and i_last_pos + AND ij.na_sequence_id = seq.na_sequence_id + AND ij.unique_reads >= 1 + AND je.junctions_pan_id = ij.protocol_app_node_id + AND je.multiplier < 20 + GROUP BY ij.na_sequence_id,ij.segment_start,ij.segment_end, ij.is_reversed,seq.source_id + ) junc + LEFT JOIN GeneIdLocGIJ ga ON + junc.na_sequence_id = ga.na_sequence_id + AND junc.segment_start >= ga.start_min + AND junc.segment_end <= ga.end_max + AND junc.is_reversed = ga.is_reversed + LEFT JOIN annotgij ag ON + junc.na_sequence_id = ag.na_sequence_id + AND junc.segment_start = ag.start_min + AND junc.segment_end = ag.end_max + AND junc.is_reversed = ag.is_reversed + WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron') + WINDOW w1 AS ( + PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type + ORDER BY ga.total_expression ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ; + commit; + i_first_pos := i_last_pos + 1; + i_last_pos := i_first_pos + iter_length; + END LOOP; + END LOOP; + END; + $$ LANGUAGE PLPGSQL; + + ; + + + + create index gijtmp_gnscid_ix on gijtmp (gene_source_id) + + ; + +:CREATE_AND_POPULATE + + + CREATE TABLE GeneIntronJunction AS + SELECT + junc.*, CASE WHEN maxv.gene_source_id is not null and maxv.max_isrpm > 0 THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) ELSE null END as percent_max, + CASE WHEN maxv.gene_source_id is not null THEN 1 ELSE 0 END as contained, + CAST (null as numeric(10)) as taxon_id, + cast (null as numeric(10)) as upstream_gene_id, + cast (null as numeric) as upstream_distance, + cast (null as numeric(10)) as downstream_gene_id, + cast (null as numeric) as downstream_distance + FROM + gijtmp junc LEFT JOIN + ( + SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm + FROM gijtmp + WHERE gene_source_id is not null + GROUP BY gene_source_id + ) maxv ON junc.gene_source_id = maxv.gene_source_id + + +:DECLARE_PARTITION; + + + + UPDATE GeneIntronJunction gij + SET taxon_id + = (SELECT taxon_id + FROM dots.NaSequence + WHERE na_sequence_id = gij.na_sequence_id) + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql new file mode 100644 index 0000000000..90f98ad9cf --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql @@ -0,0 +1,21 @@ + + + create index gijnew_loc_ix on GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) + + ; + + + + create index gijnew_gnscid_ix on GeneIntronJunction (intron_feature_id) + + ; + + + + create index gijnew_txnloc_ix + on GeneIntronJunction + (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed, + total_unique, total_isrpm, annotated_intron) + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneLocations.psql b/Model/lib/psql/webtables/MO/GeneLocations.psql new file mode 100644 index 0000000000..aae59b60e7 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneLocations.psql @@ -0,0 +1,22 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVGeneLocations AS + SELECT source_id, + string_agg(location, '; ' order by is_top_level desc) as locations + FROM (SELECT fl.feature_source_id as source_id, fl.is_top_level, + fl.sequence_source_id || ':' + || trim(to_char(fl.start_min,'999,999,999')) || '..' + || trim(to_char(fl.end_max,'999,999,999')) || '(' + || CASE coalesce(fl.is_reversed, 0) WHEN 0 THEN '+' WHEN 1 THEN '-' ELSE fl.is_reversed::varchar END + || ')' as location + FROM apidb.FeatureLocation fl, dots.NaSequence ns + WHERE fl.feature_type = 'GeneFeature' + AND fl.na_sequence_id = ns.na_sequence_id + AND (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + ) t + GROUP BY source_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql new file mode 100644 index 0000000000..80b09a0fa2 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql @@ -0,0 +1,8 @@ + + + create index gloc_ix + on :ORG_ABBREVGeneLocations (source_id, locations) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql new file mode 100644 index 0000000000..0025d7d161 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql @@ -0,0 +1,47 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE GeneMaxIntronGIJ ( + protocol_app_node_id NUMERIC(10), + gene_source_id VARCHAR(200), + max_unique NUMERIC, + max_isrpm NUMERIC, + sum_unique NUMERIC, + sum_isrpm NUMERIC, + avg_unique NUMERIC, + avg_isrpm NUMERIC + ) + + +:DECLARE_PARTITION; + + + + DO $$ + DECLARE + idlist RECORD; + BEGIN + FOR idlist IN ( + SELECT DISTINCT na_sequence_id + FROM GeneIdLocGIJ + ) + LOOP + INSERT INTO GeneMaxIntronGIJ ( + SELECT j.protocol_app_node_id, ga.source_id, max(unique_reads) as max_unique, max(round(j.unique_reads * mult.multiplier,2)) as max_isrpm, + sum(unique_reads) as sum_unique, sum(round(j.unique_reads * mult.multiplier,2)) as sum_isrpm, avg(unique_reads) as avg_unique, avg(round(j.unique_reads * mult.multiplier,2)) as avg_isrpm + FROM apidb.intronjunction j, GeneIdLocGIJ ga, namemappinggij mult + WHERE ga.na_sequence_id = idlist.na_sequence_id + AND ga.na_sequence_id = j.na_sequence_id + AND ga.start_min <= j.segment_start + AND ga.end_max >= j.segment_end + AND ga.is_reversed = j.is_reversed + AND j.protocol_app_node_id = mult.junctions_pan_id + GROUP BY j.protocol_app_node_id, ga.source_id + ); + commit; + END LOOP; + END; + $$ LANGUAGE PLPGSQL; + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql new file mode 100644 index 0000000000..2a734e98a7 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql @@ -0,0 +1,6 @@ + + + CREATE INDEX GnMxIntGIJ_ix on GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id) + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneModelDump.psql b/Model/lib/psql/webtables/MO/GeneModelDump.psql new file mode 100644 index 0000000000..698e4fc44f --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneModelDump.psql @@ -0,0 +1,32 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE GeneModelDump as + SELECT source_id, project_id, sequence_id, gm_start,gm_end, type, is_reversed, + string_agg(transcript_id, ',' ORDER BY transcript_id) AS transcript_ids + FROM ( + SELECT distinct ta.source_id as transcript_id, ta.gene_source_id as source_id, ta.project_id, + ta.sequence_id,gm.start_min as gm_start, gm.end_max as gm_end, + gm.type, gl.is_reversed + FROM + apidb.FeatureLocation gl, dots.NaSequence s, + TranscriptAttributes ta, + ( + SELECT CASE el.feature_type WHEN 'ExonFeature' THEN 'Exon' ELSE el.feature_type END as type, + el.parent_id as na_feature_id, el.start_min as start_min, el.end_max as end_max + FROM apidb.FeatureLocation el + WHERE el.feature_type in ('ExonFeature','five_prime_UTR', 'three_prime_UTR','CDS','Intron') + AND el.is_top_level = 1 + ) gm + WHERE gm.na_feature_id = ta.na_feature_id + AND s.na_sequence_id = gl.na_sequence_id + AND ta.na_feature_id = gl.na_feature_id + AND gl.is_top_level = 1 + ) t + GROUP BY source_id, project_id, sequence_id, + gm_start, gm_end, type, is_reversed + ORDER BY CASE WHEN is_reversed = 1 THEN -1 * gm_start ELSE gm_start END + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql new file mode 100644 index 0000000000..89fc3c5b8e --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql @@ -0,0 +1,9 @@ + + + create index gmd_ix + on GeneModelDump + (source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql b/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql new file mode 100644 index 0000000000..c691f5b8ce --- /dev/null +++ b/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql @@ -0,0 +1,14 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE GeneSummaryFilter AS + SELECT CAST(filter_name AS VARCHAR(80)) AS filter_name + FROM (SELECT species as filter_name + FROM GeneAttributes + UNION + SELECT organism as filter_name + FROM GeneAttributes) t + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql b/Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql new file mode 100644 index 0000000000..3b5c7cbe91 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql @@ -0,0 +1,26 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE GenomicSequenceId AS + SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence + FROM ( + SELECT ns.source_id as id, ns.source_id as sequence + FROM dots.NaSequence ns, sres.OntologyTerm oterm + WHERE ns.sequence_ontology_id = oterm.ontology_term_id + AND oterm.name in ('random_sequence', 'contig', 'supercontig', 'chromosome','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle','kinetoplast') + UNION + SELECT dr.primary_identifier AS id, ns.source_id AS sequence + FROM dots.NaSequence ns, dots.DbRefNaSequence drnf, + sres.DbRef dr, sres.ExternalDatabaseRelease edr, + sres.ExternalDatabase ed + WHERE dr.primary_identifier IS NOT NULL + AND ns.na_sequence_id = drnf.na_sequence_id + AND drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id + = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + ) subquery1 + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql new file mode 100644 index 0000000000..90bc21c3d6 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql @@ -0,0 +1,21 @@ + + + CREATE INDEX GenSeqId_sequence_idx ON GenomicSequenceId (sequence, id) + + + ; + + + + CREATE INDEX GenSeqId_id_idx ON GenomicSequenceId (id, sequence) + + + ; + + + + CREATE INDEX GenSeqId_lowid_idx ON GenomicSequenceId (lower(id), sequence) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql new file mode 100644 index 0000000000..be8a73a415 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql @@ -0,0 +1,14 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE GenomicSequenceSequence AS + SELECT sa.source_id, cast(apidb.project_id(tn.name) as varchar(20)) as project_id, + ns.sequence + FROM GenomicSeqAttributes sa, dots.NaSequence ns, sres.TaxonName tn + WHERE sa.na_sequence_id = ns.na_sequence_id + AND ns.taxon_id = tn.taxon_id + AND tn.name_class = 'scientific name' + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql new file mode 100644 index 0000000000..d68fd68292 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql @@ -0,0 +1,7 @@ + + + create index GenomicSeq_ix on GenomicSequenceSequence (source_id, project_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/GoTermSummary.psql b/Model/lib/psql/webtables/MO/GoTermSummary.psql new file mode 100644 index 0000000000..7757364b49 --- /dev/null +++ b/Model/lib/psql/webtables/MO/GoTermSummary.psql @@ -0,0 +1,56 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVGoTermSummary AS + SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id, + ggt.taxon_id, ggt.is_not, replace(ggt.go_id, '_', ':') as go_id, + ggt.go_term_id, ggt.ontology, + replace(ggt.go_term_name, '_',' ') as go_term_name, ggt.source, + ggt.evidence_code, + CASE ggt.evidence_code WHEN 'IEA' THEN 'Computed' ELSE 'Curated' END as evidence_category, + ggt.reference, ggt.evidence_code_parameter, + ol.min_depth as depth, + case + when gs.ontology_term_id is null then 0 + else 1 + end as is_go_slim + FROM :ORG_ABBREVGeneGoTerms ggt + LEFT JOIN :ORG_ABBREVOntologyLevels ol ON ggt.go_term_id = ol.ontology_term_id + LEFT JOIN ( + SELECT distinct ontology_term_id + FROM apidb.GoSubset + WHERE go_subset_term = 'goslim_generic' + ) gs ON ggt.go_term_id = gs.ontology_term_id + UNION + SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id, + ggt.taxon_id, ggt.is_not, + replace (substr(ot.source_id, 1, 25),'_',':') as go_id, + ot.ontology_term_id as go_term_id, ggt.ontology, + replace (substr(ot.name, 1,250),'_',' ') as go_term_name, + ggt.source, ggt.evidence_code, + CASE ggt.evidence_code WHEN 'IEA' THEN 'Computed' ELSE 'Curated' END as evidence_category, + ggt.reference, + ggt.evidence_code_parameter, + ol.min_depth as depth, + case + when gs.ontology_term_id is null then 0 + else 1 + end as is_go_slim + FROM :ORG_ABBREVGeneGoTerms ggt, sres.OntologyRelationship orel, + sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr, + sres.OntologyTerm ot + LEFT JOIN :ORG_ABBREVOntologyLevels ol ON ot.ontology_term_id = ol.ontology_term_id + LEFT JOIN ( + SELECT distinct ontology_term_id + FROM apidb.GoSubset + WHERE go_subset_term = 'goslim_generic' + ) gs ON ot.ontology_term_id = gs.ontology_term_id + WHERE ggt.go_term_id = orel.subject_term_id + AND orel.object_term_id = ot.ontology_term_id + AND edr.external_database_release_id = ot.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND ed.name ='GO_RSRC' + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql new file mode 100644 index 0000000000..55b2bbf76d --- /dev/null +++ b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql @@ -0,0 +1,16 @@ + + + create index GoTermSum_aaSeqId_idx ON :ORG_ABBREVGoTermSummary (aa_sequence_id, go_id, source) + + + ; + + + + create index GoTermSum_plugin_ix ON :ORG_ABBREVGoTermSummary + (ontology, gene_source_id, is_not, is_go_slim, + go_id, go_term_name, evidence_code, evidence_category) + + + ; + diff --git a/Model/lib/psql/webtables/MO/IntronSupportLevel.psql b/Model/lib/psql/webtables/MO/IntronSupportLevel.psql new file mode 100644 index 0000000000..f9895a117e --- /dev/null +++ b/Model/lib/psql/webtables/MO/IntronSupportLevel.psql @@ -0,0 +1,119 @@ +:CREATE_AND_POPULATE + + + create table :ORG_ABBREVIntronSupportLevel as + SELECT * FROM ( + SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-high') as string_value + FROM ( + SELECT gene_source_id + , ontology_term + , case when count(*) = intron_count THEN 'All' + when count(*) = 0 THEN 'None' + else 'Any' end as string_value + FROM ( + SELECT gij.gene_source_id + , 'intron_junction' as ontology_term + , intronCount.intron_count + FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats + , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount + WHERE gij.gene_source_id = intronCount.source_id + and gij.na_sequence_id = stats.na_sequence_id + and gij.annotated_intron = 'Yes' + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END + AND (gij.contained = 0 or gij.percent_max >= 2 /*stats.perc0005_annot_percent_max*/) + ) t + GROUP BY gene_source_id, ontology_term, intron_count + ) t + WHERE string_value = 'All' + + UNION + + SELECT gene_source_id + , ontology_term + , case when count(*) = intron_count THEN 'All-high' + when count(*) = 0 THEN 'None' + else 'Any-high' end as string_value + FROM ( + SELECT gij.gene_source_id + , 'intron_junction' as ontology_term + , intronCount.intron_count + FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats + , (SELECT count (*) as intron_count, source_id FROM apidbtuning.genemodeldump WHERE type = 'Intron' GROUP BY source_id) intronCount + WHERE gij.gene_source_id = intronCount.source_id + AND gij.na_sequence_id = stats.na_sequence_id + AND gij.annotated_intron = 'Yes' + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END + AND (gij.contained = 0 or gij.percent_max >= 2) + ) t2 + GROUP BY gene_source_id, ontology_term, intron_count + + UNION + + SELECT gene_source_id + , ontology_term + , CASE WHEN count(*) = intron_count THEN 'All-low' + WHEN count(*) = 0 THEN 'None' + ELSE 'Any-low' END as string_value + FROM ( + SELECT gij.gene_source_id + , 'intron_junction' as ontology_term + , intronCount.intron_count + FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats + , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount + WHERE gij.gene_source_id = intronCount.source_id + AND gij.na_sequence_id = stats.na_sequence_id + AND gij.annotated_intron = 'Yes' + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END + AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max) + AND gij.intron_feature_id not in ( + SELECT gij.intron_feature_id + FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats + WHERE gij.na_sequence_id = stats.na_sequence_id + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END + AND (gij.contained = 0 or gij.percent_max >= 2) + ) + ) t3 + GROUP BY gene_source_id, ontology_term, intron_count + + UNION + + SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-low') as string_value + FROM ( + SELECT gene_source_id + , ontology_term + , case when count(*) = intron_count THEN 'All' + when count(*) = 0 THEN 'None' + else 'Any' end as string_value + FROM ( + SELECT gij.gene_source_id + , 'intron_junction' as ontology_term + , intronCount.intron_count + FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats + , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount + WHERE gij.gene_source_id = intronCount.source_id + AND gij.na_sequence_id = stats.na_sequence_id + AND gij.annotated_intron = 'Yes' + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END + AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max) + AND gij.intron_feature_id not in ( + SELECT gij.intron_feature_id + FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats + WHERE gij.na_sequence_id = stats.na_sequence_id + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END + AND (gij.contained = 0 or gij.percent_max >= 2) + ) + ) t + GROUP BY gene_source_id, ontology_term, intron_count + ) t4 + WHERE string_value = 'All' + ) t + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql b/Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords.psql new file mode 100644 index 0000000000..ac598bbc08 --- /dev/null +++ b/Model/lib/psql/webtables/MO/IntronUtrCoords.psql @@ -0,0 +1,35 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE IntronUtrCoords AS + SELECT na_feature_id, source_id, + '[' || regexp_replace(string_agg(text,',' ORDER BY start_min), '.quot;', '"' ) || ']' AS gen_rel_intron_utr_coords + FROM ( + SELECT na_feature_id, source_id, start_min, + '["' || feature_type || '",' || start_min || ',' || end_max || ']' AS text + FROM ( + SELECT fl.feature_type, tl.na_feature_id, tl.feature_source_id AS source_id, + CASE + WHEN tl.is_reversed = 1 + THEN tl.end_max - fl.end_max + 1 + ELSE fl.start_min - tl.start_min + 1 + END AS start_min, + CASE + WHEN tl.is_reversed = 1 + THEN tl.end_max - fl.start_min + 1 + ELSE fl.end_max - tl.start_min + 1 + END AS end_max + FROM + apidb.TranscriptLocation tl, apidb.FeatureLocation fl + WHERE + tl.na_feature_id = fl.parent_id + AND fl.feature_type in('UTR', 'Intron') + AND tl.is_top_level = 1 + AND fl.is_top_level = 1 + ) t1 + ) t2 + GROUP BY na_feature_id, source_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql new file mode 100644 index 0000000000..0d38e419d9 --- /dev/null +++ b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql @@ -0,0 +1,16 @@ + + + CREATE INDEX iuc_srcid_ix + ON IntronUtrCoords (source_id, na_feature_id) + + + ; + + + + CREATE INDEX iuc_nfid_ix + ON IntronUtrCoords (na_feature_id, source_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ.psql new file mode 100644 index 0000000000..edc7d78f8e --- /dev/null +++ b/Model/lib/psql/webtables/MO/NameMappingGIJ.psql @@ -0,0 +1,125 @@ + + + CREATE UNLOGGED TABLE JunExpGIJtmp AS + WITH ij AS ( + SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name, + regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name + FROM panio p, panio pj, results.nafeatureexpression nafe, study.protocolappnode pan + WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) + AND pj.input_pan_id = p.input_pan_id + AND p.output_pan_id = pan.protocol_app_node_id + AND pan.name like '%tpm - unique%' + AND p.output_pan_id = nafe.protocol_app_node_id + GROUP BY pj.output_pan_id, p.output_pan_id, pan.name + ORDER BY pj.output_pan_id + ) , stats AS ( + SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, + sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier + FROM apidb.IntronJunction + WHERE unique_reads >= 1 + GROUP BY protocol_app_node_id + ), part AS ( + SELECT + ij.junctions_pan_id, ij.avg_value, stats.multiplier + , max(ij.expression_pan_id) OVER w as max_exp_pan_id + , max(ij.sample_name) OVER w as max_sample_Name + , max(ij.exp_name) OVER w as max_exp_name + FROM ij, stats + WHERE ij.junctions_pan_id = stats.protocol_app_node_id + WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) + ) + SELECT DISTINCT * FROM ( + SELECT junctions_pan_id + , first_value(max_exp_pan_id) OVER w1 as exp_pan_id + , first_value(max_sample_name) OVER w1 as sample_name + , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands + , multiplier + FROM part + WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) + ) t + ORDER BY junctions_pan_id + + ; + + + + create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) + + ; + + + + CREATE UNLOGGED TABLE MappingStatsGIJtmp ( + junctions_pan_id, + read_length, + mapped_reads, + avg_mapping_coverage, + num_replicates + ) AS + SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, + round(avg(number_mapped_reads),1) as mapped_reads, + round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) + as avg_mapping_coverage, + count(*) as num_replicates + FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, + cb.value::NUMERIC as number_mapped_reads, + cc.value::NUMERIC as avg_mapping_coverage + FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, + PANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb, + STUDY.CHARACTERISTIC cc, sres.ontologyterm otc + WHERE je.junctions_pan_id = ioa.output_pan_id + AND ioa.input_pan_id = ca.protocol_app_node_id + AND ca.value is not null + AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID + AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length' + AND ca.protocol_app_node_id = cb.protocol_app_node_id + AND cb.value is not null + AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID + AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads' + AND ca.protocol_app_node_id = cc.protocol_app_node_id + AND cc.value is not null + AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID + AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage' + ) t + GROUP by Junctions_Pan_Id + + ; + + + + CREATE INDEX mpstats_pk_ix on MappingStatsGIJtmp + (junctions_pan_id,read_length,mapped_reads,avg_mapping_coverage,num_replicates) + + + ; + +:CREATE_AND_POPULATE + + + CREATE TABLE NameMappingGIJ AS + SELECT DISTINCT edp.dataset_presenter_display_name as exp_name, + edp.external_database_name, je.sample_name, + je.junctions_pan_id, je.exp_pan_id, + substr(dp.value, 1, 4000) as presenter_switch_strands, + substr(sj.value, 1, 4000) as show_intron_junctions, + substr(uj.value, 1, 4000) as include_unified_junctions, + ms.read_length, ms.mapped_reads, ms.avg_mapping_coverage, ms.num_replicates, + je.switch_strands, je.multiplier + FROM junexpgijtmp je, study.nodeNodeSet sl, study.NodeSet s, ExternalDbDatasetPresenter edp, + DatasetProperty dp, DatasetProperty sj, DatasetProperty uj, mappingstatsgijtmp ms + WHERE sl.protocol_app_node_id = je.junctions_pan_id + AND je.junctions_pan_id = ms.junctions_pan_id + AND s.node_set_id = sl.node_set_id + AND s.node_type like 'junctions' + AND s.external_database_release_id = edp.external_database_release_id + AND dp.dataset_presenter_id = edp.dataset_presenter_id + AND dp.property = 'switchStrandsProfiles' + AND sj.dataset_presenter_id = edp.dataset_presenter_id + AND sj.property = 'showIntronJunctions' + AND uj.dataset_presenter_id = edp.dataset_presenter_id + AND uj.property = 'includeInUnifiedJunctions' + AND (substr(sj.value, 1, 10) = 'true' or substr(uj.value, 1, 10) = 'true') + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql new file mode 100644 index 0000000000..025dd1f8b3 --- /dev/null +++ b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql @@ -0,0 +1,6 @@ + + + create index namemappinggij_ix on NameMappingGIJ (junctions_pan_id,exp_pan_id) + + ; + diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql new file mode 100644 index 0000000000..56714b16e6 --- /dev/null +++ b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql @@ -0,0 +1,15 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE OrganismAbbreviation AS + SELECT organism, abbreviation + FROM ( + select tn.name as organism, o.name_for_filenames as abbreviation + from apidb.Organism o, sres.TaxonName tn + where o.taxon_id = tn.taxon_id + and tn.name_class = 'scientific name' + ) subquery1 + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql new file mode 100644 index 0000000000..f04888b75c --- /dev/null +++ b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql @@ -0,0 +1,42 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE OrganismAbbreviationBlast as + SELECT organism, parent, abbreviation, substr(project_id, 1, 20) as project_id + FROM OrganismAbbreviationWS + UNION + -- all familes for popsets + SELECT DISTINCT family_name_for_files || ' Popset/Genbank Isolates' as organism, '' as parent, + family_name_for_files as abbreviation, substr(project_name, 1, 20) as project_id + FROM apidb.Organism + WHERE family_name_for_files is not null + AND abbrev || '_isolates_genbank_RSRC' IN (SELECT external_db_name as db_name FROM PopsetAttributes) + AND family_name_for_files NOT IN ('Culicosporidae', 'Dubosqiidae', 'Ordosporidae') + UNION + SELECT special.organism, special.parent, special.abbreviation, + substr(ot.project_id, 1, 20) as project_id + FROM OrganismTree ot, + ( -- all species and speciesAbbreviations from apidb.Organism where we have ests + SELECT DISTINCT + sp.name as organism, ot.parentTerm as parent, + regexp_replace(org.name_for_filenames, replace(org.strain_abbrev, '/','_'),'') as abbreviation + FROM sres.TaxonName sp, TaxonSpecies ts, apidb.Organism org, OrganismTree ot + WHERE org.taxon_id = ts.taxon_id + AND ts.species_taxon_id = sp.taxon_id + AND sp.name_class = 'scientific name' + AND ot.term = sp.name + AND org.strain_abbrev is not null + AND org.name_for_filenames is not null + AND sp.taxon_id + in (SELECT etn.taxon_id + FROM sres.TaxonName etn + WHERE etn.name in (SELECT organism FROM EstAttributes)) + UNION + SELECT 'Cryptosporidiidae SSU_18srRNA Reference Isolates' as organism, + 'Cryptosporidium' as parent, 'CryptosporidiidaeReference' as abbreviation + ) special + WHERE special.parent = ot.term + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql new file mode 100644 index 0000000000..5414446561 --- /dev/null +++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql @@ -0,0 +1,327 @@ + + + CREATE UNLOGGED TABLE DataSourceCount AS + SELECT + taxon_id, + max(CASE WHEN stype = 'organellar' THEN num ELSE null END) as organellar_has, + max(CASE WHEN stype = 'Epitope' THEN num ELSE null END) as Epitope_has, + max(CASE WHEN stype = 'Array' THEN num ELSE null END) as Array_has, + max(CASE WHEN stype = 'HTSIsolate' THEN num ELSE null END) as HTSIsolate_has, + max(CASE WHEN stype = 'Popset' THEN num ELSE null END) as Popset_has + FROM ( + SELECT DISTINCT enas.taxon_id, 'organellar' AS stype, 1 AS num + FROM dots.externalNAsequence enas, SRES.ontologyterm ot + WHERE enas.sequence_ontology_id = ot.ontology_term_id + AND ot.name in( 'mitochondrial_chromosome','apicoplast_chromosome') + GROUP BY enas.taxon_id + UNION + SELECT distinct ds.taxon_id, 'HTSIsolate' AS stype, 1 AS num + FROM apidb.DataSource ds + WHERE ds.type = 'isolates' AND ds.subtype = 'HTS_SNP' + GROUP BY taxon_id + UNION + SELECT distinct ds.taxon_id, 'Popset' AS stype, 1 AS num + FROM apidb.DataSource ds + WHERE ds.subtype = 'sequenceing_types' + GROUP BY taxon_id + UNION + SELECT distinct ds.taxon_id, 'Epitope' AS stype, 1 AS num + FROM apidb.DataSource ds + WHERE ds.type = 'epitope' + GROUP BY taxon_id + UNION + SELECT distinct ds.taxon_id, 'Array' AS stype, 1 AS num + FROM apidb.DataSource ds + WHERE ds.type = 'transcript_expression' + AND ds.subtype = 'array' + GROUP BY taxon_id + ) t + GROUP BY taxon_id + + ; + + + + CREATE UNLOGGED TABLE OrganismCentromere AS + SELECT distinct s.taxon_id, + case when count(*) > 0 then 1 else 0 end as hasCentromere + FROM DOTS.MISCELLANEOUS f + , sres.ontologyTerm ot + , dots.nasequence s + WHERE ot.ontology_term_id = f.sequence_ontology_id + AND ot.name='centromere' + AND f.na_sequence_id = s.na_sequence_id + GROUP BY s.taxon_id + + ; + + + + CREATE UNLOGGED TABLE SequenceCount AS + SELECT + taxon_id, + max(CASE WHEN sequence_type = 'contig' THEN num ELSE null END) as contig_num, + max(CASE WHEN sequence_type = 'supercontig' THEN num ELSE null END) as supercont_num, + max(CASE WHEN sequence_type = 'chromosome' THEN num ELSE null END) as chrom_num + FROM ( + SELECT count(*) as num, sequence_type, taxon_id + FROM GenomicSeqAttributes + WHERE is_top_level =1 + GROUP BY taxon_id, sequence_type + ) t + GROUP BY taxon_id + + ; + + + + CREATE UNLOGGED TABLE CommunityCount AS + -- SELECT taxon_id, count(*) as communityCount + -- TODO: addd this back + select taxon_id, 0 as communityCount + FROM GeneAttributes + --WHERE + --(source_id, project_id) IN ( + --SELECT distinct stable_id, project_name + --userlogins5.mappedComment@prodn.login_comment + --FROM userlogins5.mappedComment + --WHERE is_visible = 1 + --AND comment_target_id = 'gene' + --) + GROUP BY taxon_id + + ; + + + + CREATE UNLOGGED TABLE ProfileCount AS + SELECT ga.taxon_id, + count(distinct(case when p.dataset_type = 'transcript_expression' + and p.dataset_subtype like '%rt_pcr%' + and ga.is_deprecated = 0 + then p.source_id + else '' + end)) as rtPCRCount, + count(distinct(case when p.dataset_type = 'transcript_expression' + and p.dataset_subtype = 'rnaseq' + and ga.is_deprecated = 0 + then p.source_id + else '' + end)) as rnaSeqCount, + count(distinct(case when p.dataset_type = 'transcript_expression' + and p.dataset_subtype = 'array' + and ga.is_deprecated = 0 + then p.source_id + else '' + end)) as geneArrayCount + FROM Profile p + RIGHT OUTER JOIN GeneAttributes ga ON ga.source_id = p.source_id + GROUP BY ga.taxon_id + + ; + + + + CREATE UNLOGGED TABLE PopsetCount AS + SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id + FROM ( + (SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id + FROM dots.similarity s, PopsetAttributes i, + core.tableinfo t, dots.nasequence nas + WHERE s.query_id = i.na_sequence_id + AND nas.na_sequence_id = s.subject_id + AND t.table_id = s.subject_table_id + AND t.table_id = s.query_table_id + AND t.name = 'ExternalNASequence' + AND s.pvalue_exp <= -10 + ) sim LEFT JOIN + (SELECT i.source_id, seq.source_id as sequence_id + FROM dots.similarity s, PopsetAttributes i, GeneAttributes g, + core.tableinfo t, dots.nasequence seq + WHERE s.query_id = i.na_sequence_id + AND s.subject_id = g.na_sequence_id + AND t.table_id = s.subject_table_id + AND t.table_id = s.query_table_id + AND s.min_subject_start <= g.end_max + AND s.max_subject_end >= g.start_min + AND g.na_sequence_id = seq.na_sequence_id + AND t.name = 'ExternalNASequence' + ) gene + ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id) + GROUP BY sim.taxon_id + + ; + + + + CREATE UNLOGGED TABLE GeneCount AS + SELECT genomestat.taxon_id, + genomestat.project_id, + genomestat.database_version, + genomestat.ncbi_tax_id, + genomestat.Megabps, + coalesce(snpCount.ct,0) as snpCount, + coalesce(count(distinct ga.source_id),0) as geneCount, + coalesce(count(distinct case when ga.is_pseudo =1 then ga.source_id else '' end),0) as pseudoGeneCount, + coalesce(count(distinct case when (ga.gene_type ='protein coding' or ga.gene_type ='protein coding gene') then ga.source_id else '' end),0) as codingGeneCount, + coalesce(count(distinct case when (ga.gene_type ='protein coding' or ga.gene_type ='protein coding gene') then '' else ga.source_id end),0) as otherGeneCount, + coalesce(count (distinct (case when ga.is_deprecated = 0 + then cct.gene_source_id + else NULL + end)),0) ChipChipGeneCount , + coalesce(count (distinct (case when ga.is_deprecated = 0 + then pp.source_id + else NULL + end)),0) orthologCount, + coalesce(count (distinct (case when ga.is_deprecated = 0 + then gts.gene_source_id + else NULL + end)),0) goCount, + coalesce(count (distinct (case when ga.is_deprecated = 0 + then tfbs.gene_source_id + else NULL + end)),0) tfbsCount, + coalesce(count (distinct (case when ga.is_deprecated = 0 + then mss.aa_sequence_id + else NULL + end)),0) proteomicsCount, + coalesce(count (distinct (case when ga.is_deprecated = 0 + then est.source_id + else NULL + end)),0) estCount, + coalesce(count (distinct (case when (ga.is_deprecated = 0 and ta.ec_numbers is not null) + then ga.source_id + else NULL + end)),0) ecNumberCount + FROM GeneAttributes ga + LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id + LEFT OUTER JOIN gotermsummary gts on ga.source_id = gts.gene_source_id + LEFT OUTER JOIN TFBSGene tfbs on ga.source_id = tfbs.gene_source_id + LEFT OUTER JOIN TranscriptAttributes ta on ta.gene_source_id = ga.source_id + LEFT OUTER JOIN apidb.MassSpecSummary mss on ta.aa_sequence_id = mss.aa_sequence_id + LEFT OUTER JOIN chipchipTranscript cct on ga.source_id = cct.gene_source_id + LEFT OUTER JOIN ( + SELECT distinct s.gene as source_id + FROM EstAlignmentGeneSummary s, EstAttributes e + WHERE s.est_gene_overlap_length >= 100 + AND s.is_best_alignment in (1) + AND s.percent_est_bases_aligned >= 20 + AND s.percent_identity >= 90 + AND e.best_alignment_count <= 1 + AND e.source_id = s.accession + GROUP by s.gene HAVING count(*) >= 1 + ) est ON ga.source_id = est.source_id + RIGHT OUTER JOIN ( + SELECT project_id, taxon_id, + max(database_version) as database_version, + CASE WHEN ncbi_tax_id > 9000000000 THEN NULL + ELSE ncbi_tax_id + END ncbi_tax_id, + to_char(sum(length)/1000000,'9999.99') as megabps + FROM GenomicSeqAttributes + WHERE is_top_level = 1 + GROUP BY project_ID, taxon_id, ncbi_tax_id + ) genomestat ON genomestat.taxon_id = ga.taxon_id + LEFT OUTER JOIN ( + SELECT count(distinct ga.source_id) as ct, ga.taxon_id + FROM GeneAttributes ga, SnpAttributes sf + WHERE sf.gene_source_id = ga.source_id + AND ga.is_deprecated = 0 + GROUP BY ga.taxon_id + ) snpCount ON ga.taxon_id = snpCount.taxon_id + GROUP BY genomestat.taxon_id, + genomestat.project_id, + genomestat.database_version, + genomestat.ncbi_tax_id, + genomestat.Megabps, + snpCount.ct + + ; + +:CREATE_AND_POPULATE + + + CREATE TABLE OrganismAttributes AS + SELECT oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id + , CASE WHEN ltrim(replace(oa.organism_name, tn2.name, ''))= oa.organism_name + THEN strain_abbrev + ELSE ltrim(replace(oa.organism_name, tn2.name, '')) END AS strain + FROM ( + SELECT o.project_name as project_id, + case when t.ncbi_tax_id > 10000000 + -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' || + -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1" + -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits + then 'TMPTX_' || o.public_abbrev + else 'NCBITAXON_' || t.ncbi_tax_id + end as source_id, + o.abbrev as internal_abbrev, + o.public_abbrev, + o.orthomcl_abbrev, + o.family_name_for_files, + tn.name as organism_name, + o.genome_source, + o.strain_abbrev, + o.is_annotated_genome, + o.is_reference_strain, + o.is_family_representative, + o.name_for_filenames, + o.taxon_id as component_taxon_id, + gc.database_version, + gc.megabps as megabps, + gc.ncbi_tax_id as ncbi_tax_id, + gc.snpCount as snpCount, + gc.geneCount as geneCount, + gc.pseudoGeneCount as pseudoGeneCount, + gc.codingGeneCount as codingGeneCount, + gc.otherGeneCount as otherGeneCount, + gc.ChipChipGeneCount as ChipChipGeneCount, + gc.orthologCount as orthologCount, + gc.goCount as goCount, + gc.tfbsCount as tfbsCount, + gc.proteomicsCount as proteomicsCount, + gc.estCount as estCount, + gc.ecNumberCount as ecNumberCount, + cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar, + cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate, + cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset, + cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope, + cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray, + coalesce(oc.hasCentromere, 0) as hasCentromere, + coalesce(sc.contig_num, 0) as contigCount, + coalesce(sc.supercont_num, 0) as supercontigCount, + coalesce(sc.chrom_num, 0) as chromosomeCount, + coalesce(cc.communityCount, 0) as communityCount, + coalesce(psc.popsetCount, 0) as popsetCount, + coalesce(pc.geneArrayCount, 0) as arrayGeneCount, + coalesce(pc.rnaSeqCount, 0) as rnaSeqCount, + coalesce(pc.rtPCRCount, 0) as rtPCRCount, + coalesce(ta.avg_transcript_length, 0) as avg_transcript_length + FROM apidb.Organism o + INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id + INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id + LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id + LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id + LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id + LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id + LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id + LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id + LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id + LEFT JOIN ( + SELECT taxon_id, round(avg(length),1) as avg_transcript_length + FROM TranscriptAttributes + GROUP by taxon_id + ) ta ON o.taxon_id = ta.taxon_id + WHERE tn.name_class = 'scientific name' + ) oa, + TaxonSpecies ts, + sres.taxon t, + sres.taxonname tn2 + WHERE oa.component_taxon_id = ts.taxon_id + AND ts.species_taxon_id = t.taxon_id + AND ts.species_taxon_id = tn2.taxon_id + AND tn2.name_class = 'scientific name' + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql new file mode 100644 index 0000000000..b40126a71e --- /dev/null +++ b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql @@ -0,0 +1,7 @@ + + +create unique index Organism_sourceId_idx ON OrganismAttributes (source_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql new file mode 100644 index 0000000000..326c177a8a --- /dev/null +++ b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql @@ -0,0 +1,49 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE OrganismSelectTaxonRank AS + WITH organism_rank AS ( + SELECT tn1.name as organism, o.public_abbrev, tn2.name as parent_organism, + case when tn2.name = 'Oomycetes' then 'class' else r.rank end as rank + FROM ( + WITH RECURSIVE cte AS( + SELECT taxon_id input, taxon_id, rank, parent_id + FROM sres.taxon + WHERE taxon_id IN (SELECT taxon_id FROM apidb.organism WHERE is_annotated_genome = 1) + UNION + SELECT cte.input, t.taxon_id, t.rank, t.parent_id + FROM sres.taxon t, cte + WHERE cte.parent_id = t.taxon_id + ) + SELECT input, taxon_id, rank + FROM cte + ) r + , sres.taxonname tn1 + , sres.taxonname tn2 + , apidb.organism o + WHERE r.input = tn1.taxon_id + AND r.taxon_id = tn2.taxon_id + AND tn1.name_class = 'scientific name' + AND tn2.name_class = 'scientific name' + AND (r.rank in ('phylum', 'genus', 'species', 'kingdom', 'class') or (r.rank = 'no rank' and tn2.name = 'Oomycetes')) + AND tn1.taxon_id = o.taxon_id + ) + SELECT + organisms.organism + , organisms.public_abbrev + , coalesce(phylum.parent_organism, 'N/A') as phylum + , coalesce(genus.parent_organism, 'N/A') as genus + , coalesce(species.parent_organism, 'N/A') as species + , coalesce(kingdom.parent_organism, 'N/A') as kingdom + , coalesce(class.parent_organism, 'N/A') as class + FROM + (select distinct organism, public_abbrev from organism_rank) organisms + LEFT JOIN ( select * from organism_rank where rank= 'phylum') phylum ON organisms.organism = phylum.organism + LEFT JOIN ( select * from organism_rank where rank= 'genus') genus ON organisms.organism = genus.organism + LEFT JOIN ( select * from organism_rank where rank= 'species') species ON organisms.organism = species.organism + LEFT JOIN ( select * from organism_rank where rank= 'kingdom') kingdom ON organisms.organism = kingdom.organism + LEFT JOIN ( select * from organism_rank where rank= 'class') class ON organisms.organism = class.organism + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/PathwayNodeGene.psql b/Model/lib/psql/webtables/MO/PathwayNodeGene.psql new file mode 100644 index 0000000000..f12202677e --- /dev/null +++ b/Model/lib/psql/webtables/MO/PathwayNodeGene.psql @@ -0,0 +1,14 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE PathwayNodeGene as + SELECT DISTINCT pn.pathway_node_id + , tp.gene_source_id + FROM transcriptpathway tp + , sres.pathwaynode pn + WHERE tp.pathway_id = pn.pathway_id + AND tp.ec_number_gene like replace(pn.display_label, '-', '%') + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql b/Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable.psql new file mode 100644 index 0000000000..e792016369 --- /dev/null +++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable.psql @@ -0,0 +1,44 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE PathwaysGeneTable AS + SELECT * FROM ( + SELECT DISTINCT gene_source_id + , project_id + , pathway_source_id + , pathway_name + , count(reaction_source_id) as reactions + , enzyme + , expasy_url + , pathway_source + , exact_match + FROM ( + SELECT DISTINCT tp.gene_source_id + , tp.project_id + , tp.pathway_source_id + , tp.pathway_name + , pr.reaction_source_id + , pr.enzyme + , pr.expasy_url + , tp.pathway_source + , CASE max(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match + FROM TranscriptPathway tp + , PathwayAttributes pa + , PathwayCompounds pc + , PathwayReactions pr + WHERE tp.pathway_id = pa.pathway_id + AND pc.pathway_id = pa.pathway_id + AND pr.reaction_id = pc.reaction_id + AND pr.ext_db_name = pc.ext_db_name + AND tp.ec_number_pathway = pr.enzyme + AND tp.wildcard_count_gene <= tp.wildcard_count_pathway + AND pr.enzyme != '-.-.-.-' + GROUP BY tp.gene_source_id, tp.project_id, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source + ) t + GROUP BY gene_source_id, project_id, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match + ) t2 + ORDER BY pathway_source, lower(pathway_name) + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql new file mode 100644 index 0000000000..776a8f5ba5 --- /dev/null +++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql @@ -0,0 +1,9 @@ + + + create index pgt_ix on PathwaysGeneTable + (gene_source_id, project_id, pathway_source_id, pathway_name, + reactions, enzyme, expasy_url, pathway_source, exact_match) + + + ; + diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity.psql b/Model/lib/psql/webtables/MO/PdbSimilarity.psql new file mode 100644 index 0000000000..2818d01271 --- /dev/null +++ b/Model/lib/psql/webtables/MO/PdbSimilarity.psql @@ -0,0 +1,32 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE PdbSimilarity AS + SELECT ta.source_id, eas.source_id AS pdb_chain, + substr(eas.description, 1, 100) AS pdb_title, + substr(eas.source_id + , 1 + , length(eas.source_id) - ( + CASE strpos(reverse(eas.source_id), '_') + WHEN 0 THEN length(eas.source_id) + ELSE strpos(reverse(eas.source_id), '_') END + ) + ) AS pdb_id, + s.evalue_mant, s.evalue_exp, + s.pident as percent_identity, + ROUND((s.length / ta.protein_length) * 100) AS percent_plasmo_coverage, + SUBSTR(tn.name, 1, 100) AS taxon, + eas.taxon_id as pdb_taxon_id, ta.taxon_id as gene_taxon_id + FROM apidb.PdbSimilarity s, + apiDB.ProteinDataBank eas, + sres.TaxonName tn, + TranscriptAttributes ta + WHERE ta.aa_sequence_id = s.aa_sequence_id + AND s.pident = eas.protein_data_bank_id + and tn.name_class = 'scientific name' + AND eas.taxon_id = tn.taxon_id + ORDER BY ta.source_id, eas.source_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql b/Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/Profile.psql b/Model/lib/psql/webtables/MO/Profile.psql new file mode 100644 index 0000000000..7db9b02f91 --- /dev/null +++ b/Model/lib/psql/webtables/MO/Profile.psql @@ -0,0 +1,800 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE Profile ( + DATASET_NAME VARCHAR(200), + DATASET_TYPE VARCHAR(50), + DATASET_SUBTYPE VARCHAR(50), + PROFILE_TYPE VARCHAR(30), + NODE_TYPE VARCHAR(100), + SOURCE_ID VARCHAR(500), + PROFILE_STUDY_ID NUMERIC(7), + PROFILE_SET_NAME VARCHAR(400), + PROFILE_SET_SUFFIX VARCHAR(50), + PROFILE_AS_STRING VARCHAR(4000), + MAX_VALUE NUMERIC, + MIN_VALUE NUMERIC, + MAX_TIMEPOINT VARCHAR(200), + MIN_TIMEPOINT VARCHAR(200) + ) + + +:DECLARE_PARTITION; + + + + DO $$ + DECLARE + ctrows NUMERIC := 0; + commit_after NUMERIC := 10000; + pf_rows record; + BEGIN + FOR pf_rows IN ( + SELECT + ds.name as dataset_name, ds.type as dataset_type, + ds.subtype as dataset_subtype, profile.profile_type, profile.node_type, + profile.source_id, profile.node_set_id as profile_study_id, + ps.name as profile_set_name, + cast(case + when regexp_replace(ps.name, '\[.+\]', '') like '% - %' + then regexp_replace(regexp_replace(ps.name, ' *\[.+\]', ''), '.+ - ', '') + -- special cases for legacy datasets + when regexp_replace(ps.name, '\[.+\]', '') like 'DeRisi%' + then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'DeRisi ', '') + when regexp_replace(ps.name, '\[.+\]', '') like 'winzeler_cc_%' + then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'winzeler_cc_', '') + when regexp_replace(ps.name, '\[.+\]', '') like 'Llinas RT transcription and decay %' + then regexp_replace(regexp_replace(ps.name, '\[.+\]', ''), 'Llinas RT transcription and decay ', '') + when regexp_replace(ps.name, '\[.+\]', '') like 'T.brucei paired end RNA-Seqdata from Horn%' + then regexp_replace( + regexp_replace( + regexp_replace( + regexp_replace( + regexp_replace(ps.name, '\[.+ nonunique\]', ''), + '\[.+ unique\]', ' - unique'), + '\[.+\]', ''), + 'aligned with cds coordinates ', 'cds coordinates'), + 'T.brucei paired end RNA-Seqdata from Horn ', '') + else null + end as varchar(50) + ) as profile_set_suffix, + CASE WHEN replace(profile.profile_as_string, 'NA' || CHR(9), '') = 'NA' THEN null ELSE profile.profile_as_string END as profile_as_string, + profile.max_value, profile.min_value, + profile.max_timepoint, profile.min_timepoint + FROM apidb.DataSource ds, sres.ExternalDatabase d, + sres.ExternalDatabaseRelease r, study.NodeSet ps, + ( + -- gene profiles + SELECT gene_profile.node_set_id, ga.source_id, gene_profile.profile_type, gene_profile.node_type, + gene_profile.profile_as_string, gene_profile.max_value, gene_profile.min_value, + gene_profile.max_timepoint, gene_profile.min_timepoint + FROM GeneAttributes ga, + ( + SELECT * FROM ( + WITH result AS ( + SELECT na_feature_id, protocol_app_node_id, max(value) as value + FROM results.NAFEATUREHOSTRESPONSE + GROUP BY na_feature_id, protocol_app_node_id + ), min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.value) over w1 as max_value, + first_value(result.value) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + from study.protocolappnode pan, study.NodeNodeSet sl, study.NodeSet s, result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.value ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type, + string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, min_max, result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t1 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.mean_phenotype) over w1 as max_value, + first_value(result.mean_phenotype) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.CrisprPhenotype result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.mean_phenotype ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type, + string_agg(coalesce(round(result.mean_phenotype::numeric, 2)::varchar,'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.CrisprPhenotype result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND result.na_feature_id = min_max.na_feature_id + AND sl.node_set_id = min_max.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t2 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.score) over w1 as max_value, + first_value(result.score) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeScore result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.score ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type, + string_agg(coalesce(round(result.score::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeScore result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t3 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.relative_growth_rate) over w1 as max_value, + first_value(result.relative_growth_rate) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeGrowthRate result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.relative_growth_rate ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type, + string_agg(coalesce(round(result.relative_growth_rate::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.PhenotypeGrowthRate result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t4 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.value) over w1 as max_value, + first_value(result.value) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.value ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type, + string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t5 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.percentile_channel1) over w1 as max_value, + first_value(result.percentile_channel1) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.percentile_channel1 ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'channel1_percentiles' as profile_type, s.node_type, + string_agg(coalesce(round(result.percentile_channel1::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t6 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.percentile_channel2) over w1 as max_value, + first_value(result.percentile_channel2) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.percentile_channel2 ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'channel2_percentiles' as profile_type, s.node_type, + string_agg(coalesce(round(result.percentile_channel2::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t7 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.standard_error) over w1 as max_value, + first_value(result.standard_error) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.standard_error ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'standard_error' as profile_type, s.node_type, + string_agg(coalesce(round(result.standard_error::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, results.NaFeatureExpression result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t8 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.pvalue) over w1 as max_value, + first_value(result.pvalue) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.pvalue ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'pvalue' as profile_type, s.node_type, + string_agg(coalesce(round(result.pvalue::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t9 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.amplitude) over w1 as max_value, + first_value(result.amplitude) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.PROTOCOLAPPNODE pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.amplitude ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'amplitude' as profile_type, s.node_type, + string_agg(coalesce(round(result.amplitude::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t10 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.period) over w1 as max_value, + first_value(result.period) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.period ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'period' as profile_type, s.node_type, + string_agg(coalesce(round(result.period::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.NaFeatureMetacycle result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t11 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.probability_mean) over w1 as max_value, + first_value(result.probability_mean) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.probability_mean ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'probability_mean' as profile_type, s.node_type, + string_agg(coalesce(round(result.probability_mean::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t12 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.sd) over w1 as max_value, + first_value(result.sd) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.sd ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'sd' as profile_type, s.node_type, + string_agg(coalesce(round(result.sd::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t13 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.lower_CI) over w1 as max_value, + first_value(result.lower_CI) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.lower_CI ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'lower_CI' as profile_type, s.node_type, + string_agg(coalesce(round(result.lower_CI::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t14 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.na_feature_id, + last_value(result.upper_CI) over w1 as max_value, + first_value(result.upper_CI) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.na_feature_id + ORDER BY result.upper_CI ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.na_feature_id, 'upper_CI' as profile_type, s.node_type, + string_agg(coalesce(round(result.upper_CI::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.LopitResults result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.na_feature_id = result.na_feature_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t15 +-- TO FIX +-- UNION ALL +-- SELECT * FROM ( +-- WITH min_max AS ( +-- SELECT DISTINCT sl.node_set_id, result.na_feature_id, +-- null as max_value, +-- null as min_value, +-- last_value(pan.name) over w1 as max_timepoint, +-- first_value(pan.name) over w1 as min_timepoint +-- FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.HaplotypeResult result +-- WHERE result.protocol_app_node_id = sl.protocol_app_node_id +-- AND result.protocol_app_node_id = pan.protocol_app_node_id +-- AND sl.node_set_id = s.node_set_id +-- WINDOW w1 AS ( +-- PARTITION BY sl.node_set_id, result.na_feature_id +-- ORDER BY result.value ASC +-- ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING +-- ) +-- ) +-- SELECT sl.node_set_id, result.na_feature_id, 'values' as profile_type, s.node_type, +-- string_agg(coalesce(result.value, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, +-- min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint +-- FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.HaplotypeResult result, min_max +-- WHERE result.protocol_app_node_id = sl.protocol_app_node_id +-- AND result.protocol_app_node_id = pan.protocol_app_node_id +-- AND sl.node_set_id = s.node_set_id +-- AND min_max.na_feature_id = result.na_feature_id +-- AND min_max.node_set_id = sl.node_set_id +-- GROUP BY sl.node_set_id, result.na_feature_id, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type +-- ) t16 + ) gene_profile + WHERE ga.na_feature_id = gene_profile.na_feature_id + UNION ALL + -- compound profiles + SELECT compound_profile.node_set_id, + case + WHEN compound_profile.isotopomer IS NOT NULL + THEN ca.source_id || '|' || compound_profile.isotopomer + WHEN compound_profile.mass IS NOT NULL + THEN ca.source_id || '|' || compound_profile.mass || '|' || compound_profile.retention_time + ELSE ca.source_id + END as source_id, + compound_profile.profile_type, compound_profile.node_type, + compound_profile.profile_as_string, compound_profile.max_value, compound_profile.min_value, + compound_profile.max_timepoint, compound_profile.min_timepoint + FROM CompoundAttributes ca, + ( + SELECT * FROM ( + WITH result AS ( + SELECT max(value) as value, compound_id, protocol_app_node_id, isotopomer + FROM results.CompoundMassSpec + GROUP BY compound_id, protocol_app_node_id, isotopomer + ) , min_max AS ( + SELECT DISTINCT sl.node_set_id, result.compound_id, result.isotopomer, + last_value(result.value) over w1 as max_value, + first_value(result.value) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.compound_id, result.isotopomer + ORDER BY result.value ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.compound_id, result.isotopomer, 'values' as profile_type, s.node_type, + NULL::numeric as compound_peaks_id, NULL::numeric as mass, NULL::numeric as retention_time, + string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, result, min_max + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.compound_id = result.compound_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.compound_id, result.isotopomer, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t1 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, cpc.compound_id, + last_value(cmsr.value) over w1 as max_value, + first_value(cmsr.value) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, + apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp, + apidb.CompoundPeaksChebi cpc + WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id + AND cmsr.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND cp.compound_peaks_id = cmsr.compound_peaks_id + AND cpc.compound_peaks_id = cp.compound_peaks_id + AND pan.name like '%mean%' + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time + ORDER BY cmsr.value ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, cpc.compound_id, cpc.isotopomer, 'values' as profile_type, s.node_type, + cpc.compound_peaks_id, cp.mass, cp.retention_time, + string_agg(coalesce(round(cmsr.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, + apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp, + apidb.CompoundPeaksChebi cpc, min_max + WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id + AND cmsr.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND cp.compound_peaks_id = cmsr.compound_peaks_id + AND cpc.compound_peaks_id = cp.compound_peaks_id + AND pan.name like '%mean%' + AND min_max.compound_id = cpc.compound_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t2 + UNION ALL + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, cpc.compound_id, + last_value(cmsr.percentile) over w1 as max_value, + first_value(cmsr.percentile) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, + apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp, + apidb.CompoundPeaksChebi cpc + WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id + AND cmsr.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND cp.compound_peaks_id = cmsr.compound_peaks_id + AND cpc.compound_peaks_id = cp.compound_peaks_id + AND pan.name like '%mean%' + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time + ORDER BY cmsr.percentile ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, cpc.compound_id, cpc.isotopomer, 'percentiles' as profile_type, s.node_type, + cpc.compound_peaks_id, cp.mass, cp.retention_time, + string_agg(coalesce(round(cmsr.percentile::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, + apidb.CompoundMassSpecResult cmsr, apidb.Compoundpeaks cp, + apidb.CompoundPeaksChebi cpc, min_max + WHERE cmsr.protocol_app_node_id = sl.protocol_app_node_id + AND cmsr.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND cp.compound_peaks_id = cmsr.compound_peaks_id + AND cpc.compound_peaks_id = cp.compound_peaks_id + AND pan.name like '%mean%' + AND min_max.compound_id = cpc.compound_id + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, cpc.compound_id, cpc.isotopomer, cpc.compound_peaks_id, cp.mass, cp.retention_time, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t3 + ) compound_profile + WHERE ca.id = compound_profile.compound_id + UNION ALL + -- OntologyTermResult + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, ot.name, + last_value(otr.value) over w1 as max_value, + first_value(otr.value) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, + apidb.OntologyTermResult otr, sres.OntologyTerm ot + WHERE ot.ontology_term_id = otr.ontology_term_id + AND otr.protocol_app_node_id = sl.protocol_app_node_id + AND otr.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, ot.name + ORDER BY otr.value ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, ot.name as source_id, 'value' as profile_type, s.node_type, + string_agg(coalesce(round(otr.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, + apidb.OntologyTermResult otr, sres.OntologyTerm ot, min_max + WHERE ot.ontology_term_id = otr.ontology_term_id + AND otr.protocol_app_node_id = sl.protocol_app_node_id + AND otr.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.name = ot.name + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, ot.name, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t1 + UNION ALL + -- SubjectResult + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.subject, + last_value(result.value) over w1 as max_value, + first_value(result.value) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.SubjectResult result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.subject + ORDER BY result.value ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.subject as source_id, 'values' as profile_type, s.node_type, + string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, min_max, apidb.SubjectResult result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.subject = result.subject + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.subject, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t2 + UNION ALL + -- wgcna eigengene results + SELECT * FROM ( + WITH min_max AS ( + SELECT DISTINCT sl.node_set_id, result.module_name, + last_value(result.value) over w1 as max_value, + first_value(result.value) over w1 as min_value, + last_value(pan.name) over w1 as max_timepoint, + first_value(pan.name) over w1 as min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, apidb.EigenGeneWgcnaResults result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + WINDOW w1 AS ( + PARTITION BY sl.node_set_id, result.module_name + ORDER BY result.value ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ) + SELECT sl.node_set_id, result.module_name as source_id, 'values' as profile_type, s.node_type, + string_agg(coalesce(round(result.value::numeric, 2)::varchar, 'NA'), chr(9) order by pan.node_order_num) as profile_as_string, + min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint + FROM study.ProtocolAppNode pan, study.NodeNodeSet sl, study.NodeSet s, min_max, apidb.EigenGeneWgcnaResults result + WHERE result.protocol_app_node_id = sl.protocol_app_node_id + AND result.protocol_app_node_id = pan.protocol_app_node_id + AND sl.node_set_id = s.node_set_id + AND min_max.module_name = result.module_name + AND min_max.node_set_id = sl.node_set_id + GROUP BY sl.node_set_id, result.module_name, min_max.max_value, min_max.min_value, min_max.max_timepoint, min_max.min_timepoint, s.node_type + ) t3 + ) profile + WHERE ds.name = d.name + AND ds.version = r.version + AND d.external_database_id = r.external_database_id + AND profile.node_set_id = ps.node_set_id + AND ps.external_database_release_id = r.external_database_release_id + ) + LOOP + ctrows := ctrows + 1; + INSERT INTO Profile + (DATASET_NAME, DATASET_TYPE, DATASET_SUBTYPE, PROFILE_TYPE, NODE_TYPE, SOURCE_ID, PROFILE_STUDY_ID, PROFILE_SET_NAME, + PROFILE_SET_SUFFIX, PROFILE_AS_STRING, MAX_VALUE, MIN_VALUE, MAX_TIMEPOINT, MIN_TIMEPOINT) + VALUES + (pf_rows.DATASET_NAME, pf_rows.DATASET_TYPE, pf_rows.DATASET_SUBTYPE, pf_rows.PROFILE_TYPE, pf_rows.NODE_TYPE, pf_rows.SOURCE_ID, pf_rows.PROFILE_STUDY_ID, pf_rows.PROFILE_SET_NAME, + pf_rows.PROFILE_SET_SUFFIX, pf_rows.PROFILE_AS_STRING, pf_rows.MAX_VALUE, pf_rows.MIN_VALUE, pf_rows.MAX_TIMEPOINT, pf_rows.MIN_TIMEPOINT); + IF ctrows >= commit_after THEN + COMMIT; + ctrows := 0; + END IF; + END LOOP; + commit; + END; + $$ LANGUAGE PLPGSQL; + + ; + + + + UPDATE Profile + SET dataset_name = 'tbruTREU927_Rijo_Circadian_Regulation_rnaSeq_RSRC' + WHERE dataset_name= 'tbruTREU927_RNASeq_Rijo_Circadian_Regulation_RSRC' + + ; + diff --git a/Model/lib/psql/webtables/MO/ProfileSamples.psql b/Model/lib/psql/webtables/MO/ProfileSamples.psql new file mode 100644 index 0000000000..e2ed3d0b2a --- /dev/null +++ b/Model/lib/psql/webtables/MO/ProfileSamples.psql @@ -0,0 +1,167 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE ProfileSamples AS + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id as study_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, results.nafeatureexpression r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + AND pt.profile_type not in ('pvalue', 'period', 'amplitude', 'probability_mean','sd','lower_CI','upper_CI','correlation_coefficient') + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, apidb.NAFeatureMetacycle r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + AND pt.profile_type not in ('values', 'channel1_percentiles', 'channel2_percentiles', 'standard_error') + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, apidb.LopitResults r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + AND pt.profile_type not in ('values', 'channel1_percentiles', 'channel2_percentiles', 'standard_error') + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, results.compoundMassSpec r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, APIDB.compoundmassspecresult r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + and pan.name like '%mean%' + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, apidb.ontologytermresult r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, results.nafeaturehostresponse r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, apidb.crisprphenotype r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, apidb.phenotypescore r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, apidb.phenotypegrowthrate r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, apidb.subjectresult r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + UNION + SELECT DISTINCT s.name AS study_name, pt.node_type, s.node_set_id, + REGEXP_REPLACE(REGEXP_REPLACE (pan.name, ' \[.+\] \(.+\)', ''), + ' \(.+\)', '')AS protocol_app_node_name, + pan.protocol_app_node_id, pan.node_order_num, pt.profile_type, + pt.dataset_name, pt.dataset_type, pt.dataset_subtype, + pt.profile_set_suffix + FROM profileType pt, study.nodeSet s, study.nodeNodeSet sl, + study.protocolAppNode pan, apidb.EigenGeneWgcnaResults r + WHERE pt.profile_study_id = s.node_set_id + AND sl.node_set_id = s.node_set_id + AND sl.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id =r.protocol_app_node_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql new file mode 100644 index 0000000000..f9f068e65a --- /dev/null +++ b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql @@ -0,0 +1,22 @@ + + + create index psamp_ix + on ProfileSamples + (dataset_name, profile_type, study_id, node_order_num, + protocol_app_node_id, profile_set_suffix, study_name, + node_type, protocol_app_node_name) + + + ; + + + + create index psampstdy_ix + on ProfileSamples + (study_name, node_type, profile_type, node_order_num, + protocol_app_node_id, profile_set_suffix, study_id, + protocol_app_node_name, dataset_name) + + + ; + diff --git a/Model/lib/psql/webtables/MO/ProfileType.psql b/Model/lib/psql/webtables/MO/ProfileType.psql new file mode 100644 index 0000000000..0f0f83616c --- /dev/null +++ b/Model/lib/psql/webtables/MO/ProfileType.psql @@ -0,0 +1,13 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE ProfileType AS + SELECT DISTINCT dataset_name, profile_study_id, profile_set_name, profile_set_suffix, node_type, profile_type, + dataset_type, dataset_subtype + FROM profile + WHERE profile_as_string IS NOT NULL + ORDER BY dataset_name, profile_set_name, profile_type + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/ProfileType_ix.psql b/Model/lib/psql/webtables/MO/ProfileType_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/MO/Profile_ix.psql new file mode 100644 index 0000000000..25c977a2e7 --- /dev/null +++ b/Model/lib/psql/webtables/MO/Profile_ix.psql @@ -0,0 +1,24 @@ + + + create index exprof_idx + on Profile (source_id, profile_type, profile_set_name) + + + ; + + + + create index profset_idx + on Profile (profile_set_name, profile_type) + + + ; + + + + create index srcdset_idx + on Profile (source_id, dataset_subtype, dataset_type) + + + ; + diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql new file mode 100644 index 0000000000..de8bb59b53 --- /dev/null +++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql @@ -0,0 +1,207 @@ + + + CREATE TABLE :ORG_ABBREVGoTermList AS + SELECT aa_sequence_id, ontology, source, + string_agg(go_term_name, ';' ORDER BY go_term_name) AS go_terms, + string_agg(go_id, ';' ORDER BY go_term_name) AS go_ids + FROM ( + SELECT aa_sequence_id, ontology, + CASE evidence_code WHEN 'IEA' THEN 'predicted' ELSE 'annotated' END AS source, go_term_name, go_id + FROM :ORG_ABBREVGeneGoTerms + ) t + GROUP BY aa_sequence_id, ontology, source + + ; + + + + CREATE TABLE :ORG_ABBREVProteinGoAttributes AS + SELECT DISTINCT gts.aa_sequence_id, + substr(annotated_go_component.go_terms, 1, 300) AS annotated_go_component, + substr(annotated_go_function.go_terms, 1, 300) AS annotated_go_function, + substr(annotated_go_process.go_terms, 1, 300) AS annotated_go_process, + substr(predicted_go_component.go_terms, 1, 300) AS predicted_go_component, + substr(predicted_go_function.go_terms, 1, 300) AS predicted_go_function, + substr(predicted_go_process.go_terms, 1, 300) AS predicted_go_process, + substr(annotated_go_component.go_ids, 1, 300) AS annotated_go_id_component, + substr(annotated_go_function.go_ids, 1, 300) AS annotated_go_id_function, + substr(annotated_go_process.go_ids, 1, 300) AS annotated_go_id_process, + substr(predicted_go_component.go_ids, 1, 300) AS predicted_go_id_component, + substr(predicted_go_function.go_ids, 1, 300) AS predicted_go_id_function, + substr(predicted_go_process.go_ids, 1, 300) AS predicted_go_id_process + FROM + (SELECT DISTINCT aa_sequence_id FROM :ORG_ABBREVGoTermSummary) gts + LEFT JOIN ( + SELECT * FROM :ORG_ABBREVGoTermList + WHERE source = 'annotated' AND ontology = 'Cellular Component' + ) annotated_go_component ON + gts.aa_sequence_id = annotated_go_component.aa_sequence_id + AND 'annotated' = annotated_go_component.source + AND 'Cellular Component' = annotated_go_component.ontology + LEFT JOIN ( + SELECT * FROM :ORG_ABBREVGoTermList + WHERE source = 'annotated' AND ontology = 'Molecular Function' + ) annotated_go_function ON + gts.aa_sequence_id = annotated_go_function.aa_sequence_id + AND 'annotated' = annotated_go_function.source + AND 'Molecular Function' = annotated_go_function.ontology + LEFT JOIN ( + SELECT * FROM :ORG_ABBREVGoTermList + WHERE source = 'annotated' AND ontology = 'Biological Process' + ) annotated_go_process ON + gts.aa_sequence_id = annotated_go_process.aa_sequence_id + AND 'annotated' = annotated_go_process.source + AND 'Biological Process' = annotated_go_process.ontology + LEFT JOIN ( + SELECT * FROM :ORG_ABBREVGoTermList + WHERE source = 'predicted' AND ontology = 'Cellular Component' + ) predicted_go_component ON + gts.aa_sequence_id = predicted_go_component.aa_sequence_id + AND 'predicted' = predicted_go_component.source + AND 'Cellular Component' = predicted_go_component.ontology + LEFT JOIN ( + SELECT * FROM :ORG_ABBREVGoTermList + WHERE source = 'predicted' AND ontology = 'Molecular Function' + ) predicted_go_function ON + gts.aa_sequence_id = predicted_go_function.aa_sequence_id + AND 'predicted' = predicted_go_function.source + AND 'Molecular Function' = predicted_go_function.ontology + LEFT JOIN ( + SELECT * FROM :ORG_ABBREVGoTermList + WHERE source = 'predicted' AND ontology = 'Biological Process' + ) predicted_go_process ON + gts.aa_sequence_id = predicted_go_process.aa_sequence_id + AND 'predicted' = predicted_go_process.source + AND 'Biological Process' = predicted_go_process.ontology + + ; + + + + create index ProteinGoAttr_aaSequenceId ON :ORG_ABBREVProteinGoAttributes (aa_sequence_id) + + + ; + + + + CREATE TABLE :ORG_ABBREVtProteinAttrsEc AS + SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers + FROM (SELECT DISTINCT asec.aa_sequence_id, + ec.ec_number || ' (' || ec.description || ')' AS ec_number + FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec + WHERE ec.enzyme_class_id = asec.enzyme_class_id + AND NOT asec.evidence_code = 'OrthoMCLDerived' + ) t + GROUP BY aa_sequence_id + + ; + + + + CREATE TABLE :ORG_ABBREVtProteinAttrsEcDerived AS + SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived + FROM (SELECT DISTINCT asec.aa_sequence_id, + ec.ec_number || ' (' || ec.description || ')' AS ec_number + FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec + WHERE ec.enzyme_class_id = asec.enzyme_class_id + AND asec.evidence_code = 'OrthoMCLDerived' + ) t + GROUP BY aa_sequence_id + + ; + +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVProteinAttributes AS + SELECT pi.name as project_id, + tas.source_id, tas.aa_sequence_id, + t.source_id as transcript_source_id, + gf.source_id as gene_source_id, + cdsl.na_sequence_id as na_sequence_id, + cdsl.is_reversed, + cdsl.start_min as cds_start, + cdsl.end_max as cds_end, + (taf.translation_stop - taf.translation_start) + 1 AS cds_length, + length(tas.sequence) AS protein_length, + coalesce(transmembrane.tm_domains, 0) AS tm_count, + tas.molecular_weight, + asa.min_molecular_weight, asa.max_molecular_weight, + asa.isoelectric_point, asa.hydropathicity_gravy_score, + asa.aromaticity_score, + SUBSTR(sigp.peptide_sequence, 1, 200) as signalp_peptide, + ec_numbers, + ec_numbers_derived, + go.annotated_go_component, + go.annotated_go_function, + go.annotated_go_process, + go.predicted_go_component, + go.predicted_go_function, + go.predicted_go_process, + go.annotated_go_id_component, + go.annotated_go_id_function, + go.annotated_go_id_process, + go.predicted_go_id_component, + go.predicted_go_id_function, + go.predicted_go_id_process, + SUBSTR(coalesce(rt1.anticodon, rt2.anticodon), 1, 3) AS anticodon, + 0 AS has_seqedit, + row_number() over (partition by t.source_id order by tas.length desc) as rank_in_transcript, + uniprot.uniprot_ids + FROM + core.ProjectInfo pi + INNER JOIN dots.Transcript t ON t.row_project_id = pi.project_id + INNER JOIN dots.GeneFeature gf ON gf.na_feature_id = t.parent_id + INNER JOIN dots.TranslatedAaFeature taf ON t.na_feature_id = taf.na_feature_id + INNER JOIN dots.TranslatedAaSequence tas ON taf.aa_sequence_id = tas.aa_sequence_id + LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id + LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id + LEFT JOIN apidb.AaSequenceAttribute asa ON taf.aa_sequence_id = asa.aa_sequence_id + LEFT JOIN :ORG_ABBREVProteinGoAttributes go ON tas.aa_sequence_id = go.aa_sequence_id + LEFT JOIN ( + SELECT aa_sequence_id, string_agg(peptide_sequence, ', ') peptide_sequence + FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :ORG_ABBREVSignalPeptideDomains) t + GROUP BY aa_sequence_id + ) sigp ON tas.aa_sequence_id = sigp.aa_sequence_id + LEFT JOIN ( + SELECT protein_source_id, na_sequence_id, is_reversed, + MIN(start_min) AS start_min, MAX(end_max) AS end_max + FROM apidb.CdsLocation WHERE is_top_level=1 + GROUP BY protein_source_id, na_sequence_id, is_reversed + ) cdsl ON tas.source_id = cdsl.protein_source_id + LEFT JOIN ( + SELECT aa_sequence_id, max(tm_domains) AS tm_domains + FROM (SELECT tmaf.aa_sequence_id, COUNT(*) AS tm_domains + FROM dots.TransmembraneAaFeature tmaf, dots.AaLocation al + WHERE tmaf.aa_feature_id = al.aa_feature_id + GROUP BY tmaf.aa_sequence_id) tms + GROUP BY tms.aa_sequence_id + ) transmembrane ON tas.aa_sequence_id = transmembrane.aa_sequence_id + LEFT JOIN :ORG_ABBREVProteinAttrsEc ec ON tas.aa_sequence_id = ec.aa_sequence_id + LEFT JOIN :ORG_ABBREVProteinAttrsEcDerived ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id + LEFT JOIN ( + SELECT af.aa_sequence_id, + string_agg(dbref.primary_identifier, ',' order by dbref.primary_identifier) as uniprot_ids + FROM sres.ExternalDatabase d, sres.ExternalDatabaseRelease r, + sres.DbRef, dots.DbRefAaFeature daf, dots.AaFeature af + WHERE d.name like 'Uniprot%' + AND d.external_database_id = r.external_database_id + AND r.external_database_release_id = dbref.external_database_release_id + AND dbref.db_ref_id = daf.db_ref_id + AND daf.aa_feature_id = af.aa_feature_id + GROUP BY af.aa_sequence_id + ) uniprot ON tas.aa_sequence_id = uniprot.aa_sequence_id + ORDER BY tas.source_id + + +:DECLARE_PARTITION; + + + + update :ORG_ABBREVProteinAttributes gaup + set has_seqedit = 1 + where source_id in (select source_id from apidb.seqedit) + + ; + diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql new file mode 100644 index 0000000000..f9899e7f80 --- /dev/null +++ b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql @@ -0,0 +1,14 @@ + + + CREATE INDEX PA_sourceId ON :ORG_ABBREVProteinAttributes (source_id) + + + ; + + + + CREATE INDEX PA_aaSequenceId ON :ORG_ABBREVProteinAttributes (aa_sequence_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/ProteinSequence.psql b/Model/lib/psql/webtables/MO/ProteinSequence.psql new file mode 100644 index 0000000000..937e695a3f --- /dev/null +++ b/Model/lib/psql/webtables/MO/ProteinSequence.psql @@ -0,0 +1,15 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE ProteinSequence AS + WITH pAttr AS ( + SELECT distinct source_id, aa_sequence_id + FROM ProteinAttributes) + SELECT pa.source_id, pi.name AS project_id, tas.sequence + FROM pAttr pa, dots.TranslatedAaSequence tas, core.Projectinfo pi + WHERE pa.aa_sequence_id = tas.aa_sequence_id + AND pi.project_id = tas.row_project_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql new file mode 100644 index 0000000000..98f1c06b2d --- /dev/null +++ b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql @@ -0,0 +1,7 @@ + + + create index ProtSeq_ix on ProteinSequence (source_id, project_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats.psql b/Model/lib/psql/webtables/MO/RnaSeqStats.psql new file mode 100644 index 0000000000..33964a796c --- /dev/null +++ b/Model/lib/psql/webtables/MO/RnaSeqStats.psql @@ -0,0 +1,55 @@ +:CREATE_AND_POPULATE + + + create table RnaSeqStats as + select study_id, study_name, dataset_name, taxon_id, round(avg(num_reads::integer),0) as avg_unique_reads + from (select sl.node_set_id as study_id + , s.name || '[' || s.node_type || ']' as study_name + , ed.name as dataset_name + , ds.taxon_id + , c.value as num_reads + from apidb.Datasource ds + , sres.ExternalDatabase ed + , sres.ExternalDatabaseRelease edr + , study.nodeSet s, study.nodeNodeSet sl + , study.ProtocolAppNode pan + , study.Characteristic c + , sres.OntologyTerm ot + where ds.external_database_name = ed.name + and ed.external_database_id = edr.external_database_id + and edr.external_database_release_id = s.external_database_release_id + and sl.node_set_id = s.node_set_id + and sl.protocol_app_node_id = pan.protocol_app_node_id + and pan.protocol_app_node_id = c.protocol_app_node_id + and c.qualifier_id = ot.ontology_term_id + and (ot.source_id = 'EUPATH_0000460' or ot.source_id = 'EuPathUserDefined_00507') + ) subquery1 + group by study_id, study_name, dataset_name, taxon_id + union + select study_id, study_name, dataset_name, taxon_id, round(2*avg(num_reads::integer),0) as avg_unique_reads + from (select sl.node_set_id as study_id + , s.name || '[' || s.node_type || ']' as study_name + , ed.name as dataset_name + , ds.taxon_id + , c.value as num_reads + from apidb.Datasource ds + , sres.ExternalDatabase ed + , sres.ExternalDatabaseRelease edr + , study.nodeSet s, study.nodeNodeSet sl + , study.ProtocolAppNode pan + , study.Characteristic c + , sres.OntologyTerm ot + where ds.external_database_name = ed.name + and ed.external_database_id = edr.external_database_id + and edr.external_database_release_id = s.external_database_release_id + and sl.node_set_id = s.node_set_id + and sl.protocol_app_node_id = pan.protocol_app_node_id + and pan.protocol_app_node_id = c.protocol_app_node_id + and c.qualifier_id = ot.ontology_term_id + and (ot.source_id = 'EUPATH_0000468' or ot.source_id = 'EuPathUserDefined_00515' or ot.source_id = 'EUPATH_0000476' or ot.source_id = 'EuPathUserDefined_00523') + ) subquery2 + group by study_id, study_name, dataset_name, taxon_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql b/Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql new file mode 100644 index 0000000000..80e45141ca --- /dev/null +++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql @@ -0,0 +1,41 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVSignalPeptideDomains AS + SELECT + gf.source_id gene_source_id + , t.source_id transcript_source_id + , taf.na_feature_id + , spf.aa_feature_id + , spf.aa_sequence_id + , spf.parent_id + , aal.start_min + , aal.end_max + , spf.algorithm_name + , substr(s.sequence, aal.end_max::INTEGER, 1) peptide_sequence + FROM + dots.SignalPeptideFeature spf + , dots.AaLocation aal + , dots.TranslatedAaFeature taf + , dots.TranslatedAaSequence tas + , dots.GeneFeature gf + , dots.AaSequence s + , dots.Transcript t + WHERE + spf.aa_sequence_id = s.aa_sequence_id + AND aal.aa_feature_id = spf.aa_feature_id + AND t.na_feature_id = taf.na_feature_id + AND taf.aa_sequence_id = tas.aa_sequence_id + AND tas.aa_sequence_id = spf.aa_sequence_id + AND gf.na_feature_id = t.parent_id + AND (spf.signal_probability >= .5 + OR spf.signal_probability IS NULL + OR ((spf.means_score + spf.maxy_score) / 2) >= .5 + OR ( spf.maxy_conclusion + spf.maxc_conclusion + spf.maxs_conclusion + spf.means_conclusion ) >= 3 + ) + ORDER BY + spf.aa_sequence_id, spf.aa_feature_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql new file mode 100644 index 0000000000..b305c86713 --- /dev/null +++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql @@ -0,0 +1,16 @@ + + + CREATE INDEX SignalP1_ix + ON :ORG_ABBREVSignalPeptideDomains (aa_sequence_id) + + + ; + + + + CREATE INDEX SignalP2_ix + ON :ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max) + + + ; + diff --git a/Model/lib/psql/webtables/MO/TFBSGene.psql b/Model/lib/psql/webtables/MO/TFBSGene.psql new file mode 100644 index 0000000000..3a7103d8ac --- /dev/null +++ b/Model/lib/psql/webtables/MO/TFBSGene.psql @@ -0,0 +1,46 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE TFBSGene AS + SELECT DISTINCT + ga.source_id as gene_source_id, + ga.organism as organism, + ga.genus_species as species, + aef.source_id as probe_id, + aef.na_feature_id as tfbs_na_feature_id, + CASE + WHEN ga.is_reversed = 0 + THEN round(abs(ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)),0) + ELSE round(abs(ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min)),0) + END as distance, + CASE + WHEN /* distance > 0 */ + CASE WHEN ga.is_reversed = 0 + THEN ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) + ELSE ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) + END > 0 + THEN + CASE + WHEN ga.is_reversed = 0 + THEN '-' + ELSE '+' + END + ELSE + CASE + WHEN ga.is_reversed = 1 + THEN '-' + ELSE '+' + END + END as direction, + aef.* + FROM dots.BindingSiteFeature aef, + apidb.FeatureLocation arrloc, + GeneAttributes ga + WHERE aef.na_feature_id = arrloc.na_feature_id + AND arrloc.na_sequence_id = ga.na_sequence_id + AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000) + or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) ) + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql new file mode 100644 index 0000000000..c350a43aa8 --- /dev/null +++ b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql @@ -0,0 +1,14 @@ + + + create index tfbs_geneid_idx ON TFBSGene (gene_source_id, tfbs_na_feature_id) + + + ; + + + + create index geneid_tfbs_idx ON TFBSGene (tfbs_na_feature_id,gene_source_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webtables/MO/TaxonSpecies.psql new file mode 100644 index 0000000000..7ef89cd520 --- /dev/null +++ b/Model/lib/psql/webtables/MO/TaxonSpecies.psql @@ -0,0 +1,35 @@ + + + CREATE UNLOGGED TABLE :ORG_ABBREVTaxonOfInterest AS + SELECT taxon_id + FROM :ORG_ABBREVGenomicSeqAttributes + UNION + SELECT ns.taxon_id + FROM dots.NaSequence ns, dots.Est + WHERE est.na_sequence_id = ns.na_sequence_id + + ; + +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVTaxonSpecies as + -- recursively walk taxon tree to find ancestor with rank "species" + -- Update this to select max/min level with rank species if there are multiple + WITH RECURSIVE cte AS ( + SELECT TAXON_ID, taxon_id as parent_id, 1 as lvl + FROM sres.taxon + WHERE taxon_id IN (SELECT taxon_id FROM :ORG_ABBREVTaxonofinterest) + UNION ALL + SELECT cte.taxon_id, sub.parent_id, lvl + 1 + FROM cte, sres.taxon sub + WHERE cte.parent_id = sub.taxon_id + ) + SELECT c.taxon_id, c.parent_id as species_taxon_id + FROM cte c, sres.taxon t + WHERE t.taxon_id = c.parent_id + AND t.rank='species' + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql b/Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql new file mode 100644 index 0000000000..e69de29bb2 diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webtables/MO/Taxonomy.psql new file mode 100644 index 0000000000..249eda7162 --- /dev/null +++ b/Model/lib/psql/webtables/MO/Taxonomy.psql @@ -0,0 +1,28 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE Taxonomy as + WITH RECURSIVE cte AS ( + WITH tax AS( + SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id, + cast(tn.name as varchar(80)) as name, + cast(t.rank as varchar(24)) as rank + FROM sres.Taxon t, sres.TaxonName tn + WHERE t.taxon_id = tn.taxon_id + AND tn.name_class = 'scientific name' + ) + SELECT tax.*, name as organism, ARRAY[taxon_id::numeric] as path + FROM tax + WHERE name IN (SELECT DISTINCT organism FROM GeneAttributes) + UNION + SELECT tax.*, cte.organism, cte.path || tax.taxon_id as path + FROM tax, cte + WHERE cte.parent_id = tax.taxon_id + AND tax.name != 'root' + ) + SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum + FROM (SELECT cte.* FROM cte ORDER BY path) t + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql new file mode 100644 index 0000000000..84083eda42 --- /dev/null +++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql @@ -0,0 +1,9 @@ + + + create index tax_ix + on Taxonomy + (organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank) + + + ; + diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql new file mode 100644 index 0000000000..388d722537 --- /dev/null +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql @@ -0,0 +1,440 @@ + + + CREATE table :ORG_ABBREVTranscriptUniprot AS + select na_feature_id, + substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id, + substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal + FROM (SELECT DISTINCT t.na_feature_id, dr.primary_identifier as uniprot_id + FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t, + sres.ExternalDatabase d, sres.ExternalDatabaseRelease r + WHERE dr.db_ref_id = x.DB_REF_ID + AND (x.na_feature_id = t.na_feature_id -- + or x.na_feature_id = t.parent_id) + AND dr.external_database_release_id = r.external_database_release_id + AND r.external_database_id = d.external_database_id + AND (d.name like '%uniprot_dbxref_RSRC' + OR d.name like '%dbxref_gene2Uniprot_RSRC' + OR d.name = 'Links to Uniprot Genes' + OR d.name like '%_dbxref_uniprot_linkout_RSRC' + OR d.name like '%_dbxref_uniprot_from_annotation_RSRC') + ) t + GROUP BY na_feature_id + + ; + +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVTranscriptAttributes AS + WITH genefeat AS ( + SELECT DISTINCT + cast(apidb.prefixed_project_id(tn.name, ':ORG_ABBREV') as varchar(20)) as project_id, + -- first the gene attributes: + gf.source_id AS gene_source_id, + gf.na_feature_id AS gene_na_feature_id, + LEAST(nl.start_min, nl.end_max) AS gene_start_min, + GREATEST(nl.start_min, nl.end_max) AS gene_end_max, + COALESCE(preferred_name.name, any_name.name) AS gene_name, + cast(coalesce(preferred_gene_product.product, any_gene_product.product, gf.product) + as VARCHAR(300)) as old_gene_product, + COALESCE(gp.product, 'unspecified product') as gene_product, + REPLACE(so.name, '_', ' ') AS gene_type, + gf.name as gene_ebi_biotype, + gi.gene_id, + transcripts.gene_transcript_count, + exons.gene_exon_count, + olds.old_ids AS gene_previous_ids, + coalesce(deprecated.is_deprecated, 0) as is_deprecated, + GREATEST(1, least(nl.start_min, nl.end_max) - 15000) AS gene_context_start, + LEAST(gsa.length, greatest(nl.start_min, nl.end_max) + 15000) AS gene_context_end, + GREATEST(1, least(nl.start_min, nl.end_max) - 1500) AS gene_zoom_context_start, + LEAST(gsa.length, greatest(nl.start_min, nl.end_max) + 1500) AS gene_zoom_context_end, + CAST(orthologs.name AS VARCHAR(60)) AS orthomcl_name, + coalesce(tothtssnps.total_hts_snps,0) AS gene_total_hts_snps, + coalesce(tothtssnps.hts_nonsynonymous_snps,0) AS gene_hts_nonsynonymous_snps, + coalesce(tothtssnps.hts_stop_codon_snps,0) AS gene_hts_stop_codon_snps, + coalesce(tothtssnps.hts_noncoding_snps,0) AS gene_hts_noncoding_snps, + coalesce(tothtssnps.hts_synonymous_snps,0) AS gene_hts_synonymous_snps, + coalesce(tothtssnps.hts_nonsyn_syn_ratio,0) AS gene_hts_nonsyn_syn_ratio, + CAST(cmnt.comment_string AS VARCHAR(300)) AS comment_string, + entrez_table.entrez_id AS gene_entrez_id, + gloc.locations AS gene_locations, + CAST(gsa.source_id AS VARCHAR(50)) AS sequence_id, + CAST(SUBSTR(tn.name, 1, 80) AS VARCHAR(80)) AS organism, + CAST(species_name.name AS VARCHAR(60)) AS species, + LTRIM(REGEXP_REPLACE(tn.name, replace (replace (species_name.name,'[',''), ']','') ,'')) AS strain, + taxon.ncbi_tax_id, tn.taxon_id, + so.source_id as so_id, + CAST(so.name AS VARCHAR(150)) AS so_term_name, + CAST(SUBSTR(so.definition, 1, 150) AS VARCHAR(150)) AS so_term_definition, + CAST(soRls.version AS VARCHAR(7)) AS so_version, + rt2.anticodon rt2_anticodon, + ed.name AS external_db_name, + edr.version AS external_db_version, + edr.external_database_release_id AS external_db_rls_id, + CAST(gsa.chromosome AS VARCHAR(20)) AS chromosome, + gsa.sequence_type, + gsa.chromosome_order_num, gsa.na_sequence_id + FROM dots.GeneFeature gf + INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id + INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id + INNER JOIN :ORG_ABBREVGeneLocations gloc ON gf.source_id = gloc.source_id + LEFT JOIN :ORG_ABBREVGeneProduct gp ON gf.source_id = gp.source_id + INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id + INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id + INNER JOIN :ORG_ABBREVGenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id + INNER JOIN sres.TaxonName tn ON gsa.taxon_id = tn.taxon_id + INNER JOIN sres.Taxon ON gsa.taxon_id = taxon.taxon_id + INNER JOIN sres.externalDatabaseRelease soRls ON so.external_database_release_id = soRls.external_database_release_id + INNER JOIN ( + SELECT DISTINCT gene AS source_id FROM :ORG_ABBREVGeneId + ) gene ON gf.source_id = gene.source_id + LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id + LEFT JOIN :ORG_ABBREVTaxonSpecies ts ON gsa.taxon_id = ts.taxon_id + LEFT JOIN dots.geneinstance gi ON gf.na_feature_id = gi.na_feature_id + INNER JOIN sres.TaxonName species_name ON ts.species_taxon_id = species_name.taxon_id + LEFT JOIN ( + SELECT parent_id, count(*) AS gene_transcript_count + FROM dots.Transcript + GROUP BY parent_id + ) transcripts ON gf.na_feature_id = transcripts.parent_id + LEFT JOIN ( + SELECT parent_id, count(*) AS gene_exon_count + FROM dots.ExonFeature + GROUP BY parent_id + ) exons ON gf.na_feature_id = exons.parent_id + LEFT JOIN ( + SELECT nfc.na_feature_id, MAX(SUBSTR(nfc.comment_string, 300, 1)) AS comment_string + FROM dots.NaFeatureComment nfc + GROUP BY nfc.na_feature_id + ) cmnt ON gf.na_feature_id = cmnt.na_feature_id + LEFT JOIN ( + SELECT distinct drnf.na_feature_id as gene_na_feature_id, 1 as is_deprecated + FROM dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + WHERE drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND ed.name = 'gassAWB_dbxref_gene2Deprecated_RSRC' + ) deprecated ON gf.na_feature_id = deprecated.gene_na_feature_id + LEFT JOIN ( + SELECT gene_source_id, total_hts_snps, hts_nonsynonymous_snps, hts_stop_codon_snps,hts_noncoding_snps,hts_synonymous_snps, + case when (hts_nonsynonymous_snps is null) then 0 + when (hts_synonymous_snps = 0) then 0 + else round ((hts_nonsynonymous_snps/ hts_synonymous_snps), 2) end as hts_nonsyn_syn_ratio + FROM ( + select gene_source_id, + count(*) as total_hts_snps, + sum(has_nonsynonymous_allele) as hts_nonsynonymous_snps, + sum(has_stop_codon) as hts_stop_codon_snps, + sum(is_noncoding_snp) as hts_noncoding_snps, + count(*) - sum(has_nonsynonymous_allele) - sum(has_stop_codon) - sum(is_noncoding_snp) as hts_synonymous_snps + FROM :ORG_ABBREVSnpAttributes + WHERE gene_source_id is not null + GROUP by gene_source_id + ) t + ) tothtssnps ON gf.source_id = tothtssnps.gene_source_id + LEFT JOIN ( + SELECT ssg.sequence_id as gene_na_feature_id, sg.name + FROM dots.SequenceSequenceGroup ssg, + dots.sequencegroup sg, core.tableinfo ti + WHERE ssg.sequence_group_id = sg.sequence_group_id + AND ssg.source_table_id = ti.table_id + AND ti.name = 'GeneFeature' + ) orthologs ON gf.na_feature_id = orthologs.gene_na_feature_id + LEFT JOIN ( + SELECT na_feature_id, max(product) as product + FROM apidb.GeneFeatureProduct + WHERE is_preferred = 1 + GROUP BY na_feature_id + ) preferred_gene_product ON gf.na_feature_id = preferred_gene_product.na_feature_id + LEFT JOIN ( + SELECT na_feature_id, max(product) as product + FROM apidb.GeneFeatureProduct + GROUP BY na_feature_id + ) any_gene_product ON gf.na_feature_id = any_gene_product.na_feature_id + LEFT JOIN ( + SELECT na_feature_id, max(name) as name + FROM apidb.GeneFeatureName + WHERE is_preferred = 1 + GROUP BY na_feature_id + EXCEPT + -- suppress gene/name associations from the *DELETED_RSRC databases + SELECT gfn.na_feature_id, gfn.name + FROM apidb.GeneFeatureName gfn, + sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr + WHERE gfn.external_database_release_id = edr.external_database_release_id + AND ed.external_database_id = edr.external_database_id + AND ed.name like '%DELETED_RSRC' + ) preferred_name ON gf.na_feature_id = preferred_name.na_feature_id + LEFT JOIN ( + SELECT na_feature_id, max(name) as name + FROM apidb.GeneFeatureName + GROUP by na_feature_id + EXCEPT + -- suppress gene/name associations from the *DELETED_RSRC databases + SELECT gfn.na_feature_id, gfn.name + FROM apidb.GeneFeatureName gfn, + sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr + WHERE gfn.external_database_release_id = edr.external_database_release_id + AND ed.external_database_id = edr.external_database_id + AND ed.name like '%DELETED_RSRC' + ) any_name ON gf.na_feature_id = any_name.na_feature_id + LEFT JOIN ( + SELECT dbna.na_feature_id, + substr(string_agg(db.primary_identifier, ',' order by db.primary_identifier), 1, 300) as entrez_id + FROM sres.ExternalDatabaseRelease edr, sres.DbRef db, + dots.DbRefNaFeature dbna, sres.ExternalDatabase ed + WHERE edr.external_database_release_id = db.external_database_release_id + AND ed.external_database_id = edr.external_database_id + AND dbna.db_ref_id = db.db_ref_id + AND lower(ed.name) like '%entrez%' + GROUP BY dbna.na_feature_id + ) entrez_table ON gf.na_feature_id = entrez_table.na_feature_id + LEFT JOIN ( + SELECT drnf.na_feature_id, + substr(string_agg(dr.primary_identifier, ';' order by dr.primary_identifier), 1, 900) as old_ids + FROM dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + WHERE dr.primary_identifier is not null + AND drnf.db_ref_id = dr.db_ref_id + AND dr.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND edr.id_type = 'previous id' + GROUP BY drnf.na_feature_id + ) olds ON gf.na_feature_id = olds.na_feature_id + WHERE nl.is_top_level = 1 + AND nl.feature_type = 'GeneFeature' + AND (gsa.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) + AND species_name.name_class = 'scientific name' + AND (gf.is_predicted != 1 OR gf.is_predicted is null) + AND tn.name_class = 'scientific name' + AND tn.taxon_id NOT IN (SELECT o.taxon_id FROM apidb.Organism o WHERE o.is_annotated_genome=0) + AND tn.name not in ('Plasmodium gallinaceum','Plasmodium reichenowi') + ), transcript AS ( + SELECT DISTINCT + t.parent_id as transcript_parent_id, + -- first the gene attributes: + cast(null as varchar(80)) as representative_transcript, + 0 as gene_paralog_number, 0 as gene_ortholog_number, + transcript_uniprot.uniprot_id, transcript_uniprot.uniprot_id_internal, + -- next the transcript attributes: + t.source_id AS transcript_source_id, + tso.name as transcript_type, + t.na_feature_id, + CAST(coalesce(preferred_tx_product.product, any_tx_product.product, t.product) + AS VARCHAR(300)) + AS transcript_product, + tl.start_min, tl.end_max, + tl.is_reversed, --CHECK if needed + CASE coalesce(tl.is_reversed, 0) WHEN 0 THEN 'forward' WHEN 1 THEN 'reverse' ELSE tl.is_reversed::varchar END AS strand, + CASE t.is_pseudo WHEN null THEN 0 ELSE t.is_pseudo END as is_pseudo, + transcript_exons.exon_count, + sns.length AS length, sns.na_sequence_id as spliced_na_sequence_id, + rt1.anticodon rt1_anticodon, + utr_lengths.five_prime_utr_length, + utr_lengths.three_prime_utr_length + FROM dots.Transcript t + LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id + LEFT JOIN dots.SplicedNaSequence sns ON t.na_sequence_id = sns.na_sequence_id + INNER JOIN sres.OntologyTerm tso ON t.sequence_ontology_id = tso.ontology_term_id + INNER JOIN apidb.TranscriptLocation tl ON tl.feature_source_id = t.source_id + INNER JOIN ( + SELECT rna_feature_id, count(*) as exon_count + FROM dots.RnaFeatureExon + GROUP BY rna_feature_id + ) transcript_exons ON t.na_feature_id = transcript_exons.rna_feature_id + INNER JOIN ( + SELECT t.na_feature_id as transcript_na_feature_id, five_prime.utr_length as five_prime_utr_length, three_prime.utr_length as three_prime_utr_length + FROM dots.transcript t + LEFT JOIN ( + SELECT parent_id, sum(end_max-start_min + 1) as utr_length FROM apidb.UtrLocation + WHERE direction = 5 AND is_top_level = 1 + GROUP BY parent_id + ) five_prime ON t.na_feature_id = five_prime.parent_id + LEFT JOIN ( + SELECT parent_id, sum(end_max-start_min + 1) as utr_length FROM apidb.UtrLocation + WHERE direction = 3 AND is_top_level = 1 + GROUP BY parent_id + ) three_prime ON t.na_feature_id = three_prime.parent_id + ) utr_lengths ON t.na_feature_id = utr_lengths.transcript_na_feature_id + LEFT JOIN :ORG_ABBREVTranscriptUniprot transcript_uniprot ON t.na_feature_id = transcript_uniprot.na_feature_id + LEFT JOIN ( + SELECT na_feature_id, max(product) as product + FROM apidb.TranscriptProduct + WHERE is_preferred = 1 + GROUP BY na_feature_id + ) preferred_tx_product ON t.na_feature_id = preferred_tx_product.na_feature_id + LEFT JOIN ( + SELECT na_feature_id, max(product) as product + FROM apidb.TranscriptProduct + GROUP BY na_feature_id + ) any_tx_product ON t.na_feature_id = any_tx_product.na_feature_id + WHERE + tl.is_top_level=1 + ) + SELECT DISTINCT + genefeat.project_id, + transcript.transcript_source_id AS source_id, + -- first the gene attributes: + genefeat.gene_source_id, + genefeat.gene_na_feature_id, + genefeat.gene_start_min, + genefeat.gene_end_max, + genefeat.gene_name, + CAST(COALESCE(genefeat.old_gene_product, transcript.transcript_product, + case when transcript.is_pseudo = 1 + then 'pseudogene' + else 'unspecified product' + end + ) as VARCHAR(300) + ) as old_gene_product, + genefeat.gene_product, + genefeat.gene_type, + genefeat.gene_ebi_biotype, + genefeat.gene_id, + genefeat.gene_transcript_count, + genefeat.gene_exon_count, + cast(null as varchar(80)) as representative_transcript, + genefeat.gene_previous_ids, + genefeat.is_deprecated, + 0 as gene_paralog_number, 0 as gene_ortholog_number, + genefeat.gene_context_start, + genefeat.gene_context_end, + genefeat.gene_zoom_context_start, + genefeat.gene_zoom_context_end, + genefeat.orthomcl_name, + genefeat.gene_total_hts_snps, + genefeat.gene_hts_nonsynonymous_snps, + genefeat.gene_hts_stop_codon_snps, + genefeat.gene_hts_noncoding_snps, + genefeat.gene_hts_synonymous_snps, + genefeat.gene_hts_nonsyn_syn_ratio, + genefeat.comment_string, + transcript.uniprot_id, transcript.uniprot_id_internal, + genefeat.gene_entrez_id, + genefeat.gene_locations, + -- next the transcript attributes: + transcript.transcript_source_id, + transcript.transcript_type, + transcript.na_feature_id, + CAST(coalesce(transcript.transcript_product, genefeat.old_gene_product, + case when transcript.is_pseudo = 1 + then 'pseudogene' + else 'unspecified product' + end + )AS VARCHAR(300) + ) as transcript_product, + transcript.start_min, transcript.end_max, + transcript.is_reversed, --CHECK if needed + transcript.strand, + transcript.is_pseudo, + transcript.exon_count, + transcript.length, + transcript.spliced_na_sequence_id, + genefeat.sequence_id, + genefeat.organism, + genefeat.species, + genefeat.strain, + genefeat.ncbi_tax_id, genefeat.taxon_id, + genefeat.so_id, + genefeat.so_term_name, + genefeat.so_term_definition, + genefeat.so_version, + CAST(coalesce(rt1_anticodon, rt2_anticodon)AS VARCHAR(3)) AS anticodon, + genefeat.external_db_name, + genefeat.external_db_version, + genefeat.external_db_rls_id, + genefeat.chromosome, + genefeat.sequence_type, + genefeat.chromosome_order_num, genefeat.na_sequence_id, + --next the protein attributes: + pa.source_id AS protein_source_id, + pa.aa_sequence_id, + pa.cds_start as coding_start, + pa.cds_end as coding_end, + pa.cds_length, + pa.protein_length, + pa.has_seqedit, + pa.tm_count, + pa.molecular_weight, + pa.isoelectric_point, + pa.signalp_peptide, + pa.ec_numbers, pa.ec_numbers_derived, + pa.annotated_go_component, + pa.annotated_go_function, + pa.annotated_go_process, + pa.predicted_go_component, + pa.predicted_go_function, + pa.predicted_go_process, + pa.annotated_go_id_component, + pa.annotated_go_id_function, + pa.annotated_go_id_process, + pa.predicted_go_id_component, + pa.predicted_go_id_function, + pa.predicted_go_id_process, + transcript.five_prime_utr_length, + transcript.three_prime_utr_length + FROM genefeat + INNER JOIN transcript ON genefeat. gene_na_feature_id = transcript.transcript_parent_id + LEFT JOIN :ORG_ABBREVproteinattributes pa ON transcript.transcript_source_id = pa.transcript_source_id AND pa.rank_in_transcript = 1 + ORDER BY taxon_id, source_id + + +:DECLARE_PARTITION; + + + + UPDATE :ORG_ABBREVTranscriptAttributes ta + SET exon_count = (SELECT count(*) + 1 FROM apidb.IntronLocation il WHERE il.parent_id = ta.na_feature_id AND il.end_max - il.start_min + 1 > 10 ) + WHERE ta.project_id = 'TriTrypDB' + + ; + + + + UPDATE :ORG_ABBREVTranscriptAttributes gaup + SET gene_paralog_number = ( + SELECT count(distinct gene_source_id) + FROM :ORG_ABBREVTranscriptAttributes g1 + WHERE g1.orthomcl_name = gaup.orthomcl_name + AND g1.organism = gaup.organism + AND gaup.gene_source_id != g1.gene_source_id + ), + gene_ortholog_number = ( + SELECT count(distinct gene_source_id) + FROM :ORG_ABBREVTranscriptAttributes g1 + WHERE g1.orthomcl_name = gaup.orthomcl_name + AND g1.organism != gaup.organism + ) + WHERE (gaup.gene_type = 'protein coding' or gaup.gene_type = 'protein coding gene') + + ; + + + + UPDATE :ORG_ABBREVTranscriptAttributes + SET gene_id = gene_na_feature_id + (select coalesce(max(gene_id), 0) from dots.gene) + WHERE gene_id is null + + ; + + + + UPDATE :ORG_ABBREVTranscriptAttributes + SET representative_transcript = ( + select min(source_id) + from :ORG_ABBREVTranscriptAttributes ga + where ga.gene_source_id = :ORG_ABBREVTranscriptAttributes .gene_source_id + ) + WHERE representative_transcript is null + AND gene_id is not null + + ; + + + + UPDATE :ORG_ABBREVTranscriptAttributes + SET representative_transcript = source_id + WHERE representative_transcript is null + + ; + diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql new file mode 100644 index 0000000000..bcb9c3e57f --- /dev/null +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql @@ -0,0 +1,135 @@ + + + CREATE UNIQUE INDEX TranscriptAttr_sourceId + ON :ORG_ABBREVTranscriptAttributes (source_id) + + + ; + + + + CREATE UNIQUE INDEX TranscriptAttr_srcPrj + ON :ORG_ABBREVTranscriptAttributes (source_id, gene_source_id, project_id) + + + ; + + + + CREATE UNIQUE INDEX TranscriptAttr_genesrc + ON :ORG_ABBREVTranscriptAttributes (gene_source_id, source_id, project_id) + + + ; + + + + CREATE UNIQUE INDEX TranscriptAttr_exon_ix + ON :ORG_ABBREVTranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id) + + + ; + + + + CREATE UNIQUE INDEX TranscriptAttr_loc_ix + ON :ORG_ABBREVTranscriptAttributes + (na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id, + is_deprecated, source_id, gene_source_id, project_id) + + + ; + + + + CREATE UNIQUE INDEX TranscriptAttr_feat_ix + ON :ORG_ABBREVTranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id) + + + ; + + + + CREATE UNIQUE INDEX TranscriptAttr_geneid_ix + ON :ORG_ABBREVTranscriptAttributes (gene_id, source_id, gene_source_id, project_id) + + + ; + + + + CREATE UNIQUE INDEX TransAttr_orthoname_ix + ON :ORG_ABBREVTranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id) + + + ; + + + + CREATE UNIQUE INDEX TransAttr_molwt_ix + ON :ORG_ABBREVTranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id) + + + ; + + + + CREATE INDEX TransAttr_ortholog_ix + ON :ORG_ABBREVTranscriptAttributes + (source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id) + + + ; + + + + CREATE INDEX TransAttr_orgsrc_ix + ON :ORG_ABBREVTranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max) + + + ; + + + + CREATE INDEX TransAttr_lwrsrc_ix + ON :ORG_ABBREVTranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id) + + + ; + + + + CREATE INDEX TransAttr_species_ix + ON :ORG_ABBREVTranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id) + + + ; + + + + CREATE UNIQUE INDEX TrnscrptAttr_geneinfo + ON :ORG_ABBREVTranscriptAttributes + (gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id, + protein_source_id, na_sequence_id, length, protein_length, + five_prime_utr_length, three_prime_utr_length) + + + ; + + + + CREATE UNIQUE INDEX TranscriptAttr_genenaf + ON :ORG_ABBREVTranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id) + + + ; + + + + CREATE INDEX TransAttr_locsIds_ix + ON :ORG_ABBREVTranscriptAttributes + (na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql new file mode 100644 index 0000000000..9715d5f08b --- /dev/null +++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql @@ -0,0 +1,19 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE TranscriptCenDistance AS + SELECT DISTINCT tl.feature_source_id AS transcript, + LEAST(ABS(mfl.start_min - tl.end_max), + ABS(mfl.end_max - tl.start_min)) AS centromere_distance, + tl.sequence_source_id AS genomic_sequence + FROM apidb.TranscriptLocation tl, apidb.FeatureLocation mfl, + sres.OntologyTerm so + WHERE tl.na_sequence_id = mfl.na_sequence_id + AND mfl.feature_type = 'Miscellaneous' + AND mfl.sequence_ontology_id = so.ontology_term_id + AND so.name = 'centromere' + AND tl.is_top_level = 1 + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql new file mode 100644 index 0000000000..e6630dae0d --- /dev/null +++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql @@ -0,0 +1,8 @@ + + + create index GCent_loc_ix + on TranscriptCenDistance (genomic_sequence, centromere_distance) + + + ; + diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway.psql b/Model/lib/psql/webtables/MO/TranscriptPathway.psql new file mode 100644 index 0000000000..4b1c95b668 --- /dev/null +++ b/Model/lib/psql/webtables/MO/TranscriptPathway.psql @@ -0,0 +1,112 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE TranscriptPathway ( + SOURCE_ID VARCHAR(80), + GENE_SOURCE_ID VARCHAR(80), + PROJECT_ID VARCHAR(20), + PATHWAY_SOURCE_ID VARCHAR(50), + PATHWAY_NAME VARCHAR(150), + EC_NUMBER_GENE VARCHAR(16), + WILDCARD_COUNT_GENE NUMERIC, + EC_NUMBER_PATHWAY VARCHAR(16), + WILDCARD_COUNT_PATHWAY NUMERIC, + EXACT_MATCH NUMERIC, + COMPLETE_EC NUMERIC, + PATHWAY_ID NUMERIC(12,0), + PATHWAY_SOURCE VARCHAR(200), + EXTERNAL_DATABASE_RELEASE_ID NUMERIC(10,0) + ) + + +:DECLARE_PARTITION; + + + + DO $$ + DECLARE + idlist RECORD; + BEGIN + FOR idlist IN ( SELECT DISTINCT organism FROM GeneAttributes ) + LOOP + INSERT INTO TranscriptPathway + WITH transcript_ec AS ( + SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, + -- CHECK AND FIX + -- regexp_count( ec.ec_number, '-') as wildcard_count + count( ec.ec_number) as wildcard_count + FROM sres.EnzymeClass ec + WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM dots.AaSequenceEnzymeClass) + GROUP BY ec.enzyme_class_id + ), + pathway_node_ec AS ( + SELECT distinct pn.pathway_id, pn.row_id as enzyme_class_id + FROM sres.PathwayNode pn, sres.ontologyterm ot + WHERE pn.pathway_node_type_id = ot.ontology_term_id + AND ot.name = 'enzyme' + AND pn.display_label != '-.-.-.-' + ), + pathway_ec AS ( + SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, + -- CHECK AND FIX + -- regexp_count( ec.ec_number, '-') as wildcard_count + count( ec.ec_number) as wildcard_count + FROM sres.EnzymeClass ec + WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM pathway_node_ec) + GROUP BY ec.enzyme_class_id + ), + ec_match AS ( + SELECT tec.enzyme_class_id as transcript_enzyme_class_id, + pec.enzyme_class_id as pathway_enzyme_class_id, + tec.wildcard_count as wildcard_count_transcript, + pec.wildcard_count as wildcard_count_pathway, + tec.ec_number as ec_number_transcript, + pec.ec_number as ec_number_pathway + FROM transcript_ec tec, pathway_ec pec + WHERE (tec.ec_number_1 = pec.ec_number_1 or tec.ec_number_1 is null or pec.ec_number_1 is null) + AND (tec.ec_number_2 = pec.ec_number_2 or tec.ec_number_2 is null or pec.ec_number_2 is null) + AND (tec.ec_number_3 = pec.ec_number_3 or tec.ec_number_3 is null or pec.ec_number_3 is null) + AND (tec.ec_number_4 = pec.ec_number_4 or tec.ec_number_4 is null or pec.ec_number_4 is null) + ) + SELECT DISTINCT ga.source_id + , ga.gene_source_id + , ga.project_id + , pa.source_id as pathway_source_id + , pa.name as pathway_name + , ec_match.ec_number_transcript as ec_number_gene + , ec_match.wildcard_count_transcript as wildcard_count_gene + , ec_match.ec_number_pathway + , ec_match.wildcard_count_pathway + , CASE WHEN ec_match.ec_number_pathway = ec_match.ec_number_transcript + THEN 1 + ELSE 0 END as exact_match + , CASE WHEN ec_match.wildcard_count_pathway + ec_match.wildcard_count_transcript = 0 + THEN 1 + ELSE 0 END as complete_ec + , pa.pathway_id + , pa.pathway_source + , p.external_database_release_id + FROM PathwayAttributes pa + , sres.pathway p + , pathway_node_ec pec + , ec_match + , dots.AaSequenceEnzymeClass asec + , TranscriptAttributes ga + WHERE ga.organism = idlist.organism + AND pa.pathway_id = pec.pathway_id + AND p.pathway_id = pa.pathway_id + AND pec.enzyme_class_id = ec_match.pathway_enzyme_class_id + AND asec.enzyme_class_id = ec_match.transcript_enzyme_class_id + AND ga.aa_sequence_id = asec.aa_sequence_id + AND ( + (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived') + OR ga.orthomcl_name IS NOT NULL + ) + ; + commit; + END LOOP; + END; + $$ LANGUAGE PLPGSQL; + + ; + diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql new file mode 100644 index 0000000000..7587695697 --- /dev/null +++ b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql @@ -0,0 +1,18 @@ + + + create index TranscriptPath_ix + on TranscriptPathway (gene_source_id, source_id, pathway_source_id, + pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway, + ec_number_pathway, pathway_source) + + + ; + + + + create index TranscriptPathSource_ix + on TranscriptPathway (pathway_source, gene_source_id, source_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence.psql b/Model/lib/psql/webtables/MO/TranscriptSequence.psql new file mode 100644 index 0000000000..0cc12e2ff2 --- /dev/null +++ b/Model/lib/psql/webtables/MO/TranscriptSequence.psql @@ -0,0 +1,11 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE :ORG_ABBREVTranscriptSequence AS + SELECT ta.source_id, ta.project_id, sns.sequence + FROM :ORG_ABBREVTranscriptAttributes ta, dots.SplicedNaSequence sns + WHERE ta.source_id = sns.source_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql new file mode 100644 index 0000000000..079e8faf28 --- /dev/null +++ b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql @@ -0,0 +1,7 @@ + + + create index XScriptSeq_ix on :ORG_ABBREVTranscriptSequence (source_id, project_id) + + + ; + diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql new file mode 100644 index 0000000000..7f76786eb0 --- /dev/null +++ b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql @@ -0,0 +1,27 @@ +:CREATE_AND_POPULATE + + + CREATE TABLE TransmembraneDomains AS + SELECT ta.source_id as transcript_source_id + , ta.gene_source_id AS gene_source_id + , ta.project_id + , tmf.topology AS tmf_topology + , aal.start_min AS tmf_start_min + , aal.end_max AS tmf_end_max + , substr(tas.sequence, aal.end_max::INTEGER - aal.start_min::INTEGER + 1, aal.start_min::INTEGER) AS tmf_sequence + , tmf.aa_feature_id tmf_aa_feature_id + , tmf.aa_sequence_id tmf_aa_sequence_id + , tas.source_id as protein_source_id + FROM dots.aalocation aal + , transcriptattributes ta + , dots.translatedaafeature taf + , dots.translatedaasequence tas + , dots.transmembraneaafeature tmf + WHERE ta.na_feature_id = taf.na_feature_id + AND taf.aa_sequence_id = tas.aa_sequence_id + AND tas.aa_sequence_id = tmf.aa_sequence_id + and tmf.aa_feature_id = aal.aa_feature_id + + +:DECLARE_PARTITION; + diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql new file mode 100644 index 0000000000..bdefeef42d --- /dev/null +++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql @@ -0,0 +1,8 @@ + + + create index TransDom1_ix + on TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology) + + + ; + From 0038dbd8ab6f98e76f6fa847e5dfdae2d9726510 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 9 May 2025 15:11:53 -0400 Subject: [PATCH 002/112] get MG to conform --- .../psql/webtables/MG/CompoundAttributes.psql | 13 +- Model/lib/psql/webtables/MG/CompoundId.psql | 23 ++-- .../psql/webtables/MG/CompoundProperties.psql | 7 +- .../psql/webtables/MG/CompoundTypeAheads.psql | 9 +- .../webtables/MG/GroupDomainAttribute.psql | 14 +-- .../lib/psql/webtables/MG/OntologyLevels.psql | 37 ++---- .../psql/webtables/MG/PathwayAttributes.psql | 21 +--- .../psql/webtables/MG/PathwayCompounds.psql | 13 +- Model/lib/psql/webtables/MG/PathwayNodes.psql | 111 ++++++++---------- .../psql/webtables/MG/PathwayReactions.psql | 23 ++-- 10 files changed, 97 insertions(+), 174 deletions(-) diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes.psql b/Model/lib/psql/webtables/MG/CompoundAttributes.psql index 3f69995d58..ea8207e077 100644 --- a/Model/lib/psql/webtables/MG/CompoundAttributes.psql +++ b/Model/lib/psql/webtables/MG/CompoundAttributes.psql @@ -1,6 +1,4 @@ - - - CREATE TABLE :ORG_ABBREVCompoundAttributes AS + CREATE TABLE :SCHEMA.CompoundAttributes AS SELECT p.ID , p.source_id , p.compound_name @@ -16,13 +14,8 @@ WHERE p.parent_id IS NULL AND ( p.ID = childc.parent_id OR p.ID = childc.ID ) GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids - - ; - - - - CREATE INDEX :ORG_ABBREVCompoundAttributes_idx ON :ORG_ABBREVCompoundAttributes (source_id) + ; - + CREATE INDEX :SCHEMA.CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id) ; diff --git a/Model/lib/psql/webtables/MG/CompoundId.psql b/Model/lib/psql/webtables/MG/CompoundId.psql index 64eafbd86a..e235b32106 100644 --- a/Model/lib/psql/webtables/MG/CompoundId.psql +++ b/Model/lib/psql/webtables/MG/CompoundId.psql @@ -1,15 +1,13 @@ - - - CREATE TABLE :ORG_ABBREVCompoundId AS + CREATE TABLE :SCHEMA.CompoundId AS SELECT source_id AS id, source_id AS compound, 'same ID' AS type, '' as source - FROM :ORG_ABBREVCompoundAttributes + FROM :SCHEMA.CompoundAttributes UNION SELECT p.source_id AS id, ca.source_id AS compound, 'child ID' AS type, '' as source - FROM :ORG_ABBREVCompoundAttributes ca, CompoundProperties p + FROM :SCHEMA.CompoundAttributes ca, CompoundProperties p WHERE ca.id = p.parent_id UNION SELECT da.accession_number AS id, p.source_id AS compound, 'KEGG' AS type, '' as source - FROM chebi.database_accession da, :ORG_ABBREVCompoundAttributes p + FROM chebi.database_accession da, :SCHEMA.CompoundAttributes p WHERE da.type='KEGG COMPOUND accession' AND da.compound_id = p.id UNION @@ -19,21 +17,16 @@ AND da.compound_id = c.id AND c.parent_id=p.id UNION SELECT n.name as id, ca.source_id as compound, 'name' as type, n.source - FROM :ORG_ABBREVCompoundAttributes ca, chebi.names n + FROM :SCHEMA.CompoundAttributes ca, chebi.names n WHERE ca.id = n.compound_id AND n.type = 'NAME' UNION SELECT n.name as id, ca.source_id as compound, 'synonym' as type, n.source - FROM :ORG_ABBREVCompoundAttributes ca, chebi.names n + FROM :SCHEMA.CompoundAttributes ca, chebi.names n WHERE ca.id = n.compound_id - AND n.type = 'SYNONYM' - + AND n.type = 'SYNONYM' ; - - - - CREATE INDEX :ORG_ABBREVCompoundId_idx ON :ORG_ABBREVCompoundId (id, compound) - + CREATE INDEX :SCHEMA.CompoundId_idx ON :SCHEMA.CompoundId (id, compound) ; diff --git a/Model/lib/psql/webtables/MG/CompoundProperties.psql b/Model/lib/psql/webtables/MG/CompoundProperties.psql index 7bea00b811..5faddf835f 100644 --- a/Model/lib/psql/webtables/MG/CompoundProperties.psql +++ b/Model/lib/psql/webtables/MG/CompoundProperties.psql @@ -1,6 +1,4 @@ - - - CREATE TABLE CompoundProperties AS + CREATE TABLE :SCHEMA.CompoundProperties AS SELECT c.ID, c.chebi_accession AS source_id, c.parent_id, c.name AS compound_name, substr(string_agg(cn.name, ';'), 1, 1000) AS other_names, @@ -23,7 +21,6 @@ LEFT JOIN ( SELECT parent_id, chebi_accession FROM chebi.compounds) sec ON c.ID = sec.parent_id WHERE NOT c.status in ('D', 'F') - GROUP BY c.ID, c.chebi_accession, c.parent_id, c.name, c.definition, m.mass - + GROUP BY c.ID, c.chebi_accession, c.parent_id, c.name, c.definition, m.mass ; diff --git a/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql b/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql index 244b989c5c..dfefb18f98 100644 --- a/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql +++ b/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql @@ -1,15 +1,12 @@ - - - CREATE TABLE CompoundTypeAheads AS + CREATE TABLE :SCHEMA.CompoundTypeAheads AS SELECT ca.source_id AS compound_id, ca.source_id || ' (' || ca.compound_name || ')' AS display - FROM CompoundAttributes ca, PathwayCompounds pc + FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.PathwayCompounds pc WHERE pc.chebi_accession = ca.source_id UNION SELECT ca.source_id AS compound_id, pc.compound_source_id || ' (' || ca.compound_name || ')' AS display - FROM CompoundAttributes ca, PathwayCompounds pc + FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.PathwayCompounds pc WHERE pc.chebi_accession = ca.source_id - ; diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql index 766215dcc9..b023dc3669 100644 --- a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql +++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql @@ -1,6 +1,4 @@ - - -CREATE TABLE GroupDomainAttribute AS +CREATE TABLE SCHEMA.GroupDomainAttribute AS ( SELECT og.group_id AS group_name, ag.descriptions FROM apidb.OrthologGroup og, @@ -10,7 +8,7 @@ FROM apidb.OrthologGroup og, FROM (SELECT group_name, accession, num_proteins, rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins - FROM DomainAssignment + FROM SCHEMA.DomainAssignment GROUP BY group_name,accession ) ) @@ -21,12 +19,8 @@ FROM apidb.OrthologGroup og, ) ag WHERE og.group_id = ag.group_name ) +; - ; - - - -CREATE INDEX GroupDomainAttribute_idx ON GroupDomainAttribute (group_name) - +CREATE INDEX SCHEMA.GroupDomainAttribute_idx ON SCHEMA.GroupDomainAttribute (group_name) ; diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql index a117a4a74d..417c6657aa 100644 --- a/Model/lib/psql/webtables/MG/OntologyLevels.psql +++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql @@ -1,48 +1,35 @@ - - - CREATE UNLOGGED TABLE :ORG_ABBREVIs_a_links AS + CREATE UNLOGGED TABLE :SCHEMA.Is_a_links AS SELECT subject_term_id, object_term_id FROM sres.OntologyRelationship rel, sres.OntologyTerm pred WHERE rel.predicate_term_id = pred.ontology_term_id AND pred.name = 'is_a' - ; - - - CREATE UNLOGGED TABLE :ORG_ABBREVRoots AS - SELECT object_term_id FROM :ORG_ABBREVis_a_links + CREATE UNLOGGED TABLE :SCHEMA.Roots AS + SELECT object_term_id FROM :SCHEMA.is_a_links EXCEPT - SELECT subject_term_id FROM :ORG_ABBREVis_a_links - + SELECT subject_term_id FROM :SCHEMA.is_a_links ; - - - CREATE TABLE :ORG_ABBREVOntologyLevels as + CREATE TABLE :SCHEMA.OntologyLevels as WITH RECURSIVE levels(ontology_term_id, depth) AS ( - SELECT object_term_id, 1 as depth FROM :ORG_ABBREVRoots + SELECT object_term_id, 1 as depth FROM :SCHEMA.Roots UNION - SELECT :ORG_ABBREVis_a_links.subject_term_id, levels.depth + 1 as depth - FROM :ORG_ABBREVIs_a_links, levels - WHERE :ORG_ABBREVis_a_links.object_term_id = levels.ontology_term_id + SELECT :SCHEMA.is_a_links.subject_term_id, levels.depth + 1 as depth + FROM :SCHEMA.Is_a_links, levels + WHERE :SCHEMA.is_a_links.object_term_id = levels.ontology_term_id ) SELECT ontology_term_id, min(depth) as min_depth, max(depth) as max_depth FROM ( SELECT ontology_term_id, depth FROM levels - WHERE ontology_term_id NOT IN (SELECT object_term_id FROM :ORG_ABBREVRoots) + WHERE ontology_term_id NOT IN (SELECT object_term_id FROM :SCHEMA.Roots) UNION - SELECT object_term_id, 0 FROM :ORG_ABBREVRoots + SELECT object_term_id, 0 FROM :SCHEMA.Roots ) t GROUP BY ontology_term_id - ; - - - create index olev_termix on :ORG_ABBREVOntologyLevels (ontology_term_id, min_depth, max_depth) - - + create index olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth) ; diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webtables/MG/PathwayAttributes.psql index 445fd4bffb..12720d8b73 100644 --- a/Model/lib/psql/webtables/MG/PathwayAttributes.psql +++ b/Model/lib/psql/webtables/MG/PathwayAttributes.psql @@ -1,6 +1,4 @@ - - - CREATE TABLE PathwayAttributes as + CREATE TABLE :SCHEMA.PathwayAttributes as SELECT p.source_id , p.pathway_id @@ -47,22 +45,13 @@ AND source_id NOT IN('ec01100', 'ec01110', 'ec01120') -- temporarily remove MPMP from release 46 AND ed.name NOT LIKE '%MPMP%' - ; - - - CREATE UNIQUE INDEX PathAttr_sourceId_pwaySrc - ON :ORG_ABBREVPathwayAttributes (source_id, pathway_source) - - + CREATE UNIQUE INDEX :SCHEMA.PathAttr_sourceId_pwaySrc + ON :SCHEMA.PathwayAttributes (source_id, pathway_source) ; - - - create index PathAttr_ix - on PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count) - - + create index :SCHEMA.PathAttr_ix + on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count) ; diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds.psql b/Model/lib/psql/webtables/MG/PathwayCompounds.psql index 1ca74c7a6c..a30b299c31 100644 --- a/Model/lib/psql/webtables/MG/PathwayCompounds.psql +++ b/Model/lib/psql/webtables/MG/PathwayCompounds.psql @@ -1,6 +1,4 @@ - - - CREATE TABLE PathwayCompounds AS + CREATE TABLE :SCHEMA.PathwayCompounds AS SELECT pathway_id , reaction_id @@ -78,14 +76,9 @@ AND p.EXTERNAL_DATABASE_RELEASE_ID = edr.EXTERNAL_DATABASE_RELEASE_ID AND edr.EXTERNAL_DATABASE_ID = ed.EXTERNAL_DATABASE_ID ) t2 LEFT OUTER JOIN CHEBI.COMPOUNDS c on t2.row_id = c.ID - ; - - - create index PthCmpd_id_ix - on PathwayCompounds (pathway_id, reaction_id, ext_db_name) - - + create index :SCHEMA.PthCmpd_id_ix + on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name) ; diff --git a/Model/lib/psql/webtables/MG/PathwayNodes.psql b/Model/lib/psql/webtables/MG/PathwayNodes.psql index 82ca920c30..7b8740ca5b 100644 --- a/Model/lib/psql/webtables/MG/PathwayNodes.psql +++ b/Model/lib/psql/webtables/MG/PathwayNodes.psql @@ -1,6 +1,4 @@ - - - CREATE UNLOGGED TABLE NodesWithTypes AS + CREATE UNLOGGED TABLE :SCHEMA.NodesWithTypes AS SELECT pn.pathway_id , CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS display_label , pa.url @@ -17,7 +15,7 @@ , NULL AS default_structure FROM sres.pathwaynode pn INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id - LEFT JOIN PathwayAttributes pa ON pn.display_label = pa.source_id + LEFT JOIN :SCHEMA.PathwayAttributes pa ON pn.display_label = pa.source_id WHERE ot.name = 'metabolic process' UNION ALL SELECT pn.pathway_id @@ -37,7 +35,7 @@ FROM sres.pathwaynode pn INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id LEFT JOIN sres.enzymeclass ec ON pn.row_id = ec.enzyme_class_id - LEFT JOIN PathwayNodeGene tp ON pn.pathway_node_id = tp.pathway_node_id + LEFT JOIN :SCHEMA.PathwayNodeGene tp ON pn.pathway_node_id = tp.pathway_node_id WHERE ot.name = 'enzyme' GROUP BY pn.pathway_id , pn.display_label @@ -83,26 +81,20 @@ AND s.default_structure = 'Y' ) st ON c.chebi_accession = st.compound WHERE ot.name = 'molecular entity' - ; - - - CREATE UNLOGGED TABLE ReactionsWithReversibility AS + CREATE UNLOGGED TABLE :SCHEMA.ReactionsWithReversibility AS SELECT DISTINCT spr.pathway_relationship_id , tpr.is_reversible , tpr.reaction_source_id FROM sres.pathwayrelationship spr , apidb.pathwayreactionrel prr - , PathwayReactions tpr + , :SCHEMA.PathwayReactions tpr WHERE prr.pathway_relationship_id = spr.pathway_relationship_id AND tpr.reaction_id = prr.pathway_reaction_id - - ; + ; - - - CREATE UNLOGGED TABLE EnzymeEdges AS + CREATE UNLOGGED TABLE :SCHEMA.EnzymeEdges AS SELECT DISTINCT nwt.pathway_id AS pathway_id , nwt.pathway_node_id AS e_id , nwt.type @@ -113,9 +105,9 @@ , i.associated_node_id || '_' || o.node_id || '_' || rri.is_reversible || '_' || rro.is_reversible as io FROM sres.pathwayrelationship i , sres.pathwayrelationship o - , NodesWithTypes nwt - , ReactionsWithReversibility rri - , ReactionsWithReversibility rro + , :SCHEMA.NodesWithTypes nwt + , :SCHEMA.ReactionsWithReversibility rri + , :SCHEMA.ReactionsWithReversibility rro WHERE i.node_id = nwt.pathway_node_id AND o.associated_node_id = nwt.pathway_node_id AND i.pathway_relationship_id = rri.pathway_relationship_id @@ -123,12 +115,9 @@ AND nwt.type = 'enzyme' AND rri.is_reversible = rro.is_reversible AND rri.reaction_source_id = rro.reaction_source_id - - ; - + ; - - CREATE UNLOGGED TABLE ParentNodes AS + CREATE UNLOGGED TABLE :SCHEMA.ParentNodes AS WITH AllEnzymeEdges AS ( SELECT string_agg(io, ',' ORDER BY io) AS all_edges , e_id @@ -141,7 +130,7 @@ SELECT pathway_id , all_edges , string_agg(e_id::varchar, '_' ORDER BY e_id) AS parent - FROM AllEnzymeEdges + FROM :SCHEMA.AllEnzymeEdges GROUP BY pathway_id , all_edges HAVING COUNT (*) > 1 @@ -150,26 +139,20 @@ FROM pn , AllEnzymeEdges aee WHERE aee.all_edges = pn.all_edges - ; - - - CREATE UNLOGGED TABLE NodesWithParents AS + CREATE UNLOGGED TABLE :SCHEMA.NodesWithParents AS SELECT DISTINCT ee.e_id AS pathway_node_id , pn.parent , ee.type AS node_type , ee.pathway_id - FROM EnzymeEdges ee - , ParentNodes pn + FROM :SCHEMA.EnzymeEdges ee + , :SCHEMA.ParentNodes pn WHERE pn.pathway_id = ee.pathway_id AND ee.e_id = pn.e_id - ; - - - CREATE UNLOGGED TABLE EnzymeReactions AS + CREATE UNLOGGED TABLE :SCHEMA.EnzymeReactions AS SELECT DISTINCT pn.PATHWAY_NODE_ID node_id , pr.SOURCE_ID AS reaction_source_id FROM sres.pathwaynode pn @@ -181,28 +164,22 @@ AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID AND ot.name = 'enzyme' - AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID - + AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID ; - - - CREATE UNLOGGED TABLE ParentsForEdges AS + CREATE UNLOGGED TABLE :SCHEMA.ParentsForEdges AS SELECT ee.e_id , ee.m1_id , ee.ir1 , ee.m2_id , ee.ir2 , np.parent - FROM EnzymeEdges ee - , NodesWithParents np + FROM :SCHEMA.EnzymeEdges ee + , :SCHEMA.NodesWithParents np WHERE ee.e_id = np.pathway_node_id - ; - - - CREATE TABLE PathwayEdges AS + CREATE TABLE :SCHEMA.PathwayEdges AS SELECT pa.source_id , pa.pathway_source , rel.* @@ -211,16 +188,16 @@ , coalesce(pe.parent, ee.e_id::varchar) AS source , ee.m1_id::varchar AS target , max(ee.ir1) AS is_reversible - FROM EnzymeEdges ee - LEFT JOIN ParentsForEdges pe ON ee.e_id = pe.e_id + FROM :SCHEMA.EnzymeEdges ee + LEFT JOIN :SCHEMA.ParentsForEdges pe ON ee.e_id = pe.e_id GROUP BY ee.pathway_id, ee.m1_id, coalesce(pe.parent, ee.e_id::varchar) UNION SELECT DISTINCT ee.pathway_id , ee.m2_id::varchar AS source , coalesce(pe.parent, ee.e_id::varchar) AS target , max(ee.ir2) AS is_reversible - FROM EnzymeEdges ee - LEFT JOIN ParentsForEdges pe ON ee.e_id = pe.e_id + FROM :SCHEMA.EnzymeEdges ee + LEFT JOIN :SCHEMA.ParentsForEdges pe ON ee.e_id = pe.e_id GROUP BY ee.pathway_id, ee.m2_id, coalesce(pe.parent, ee.e_id::varchar) UNION SELECT pn1.pathway_id @@ -241,12 +218,9 @@ ) rel , PathwayAttributes pa WHERE pa.pathway_id = rel.pathway_id - ; - - - - CREATE TABLE PathwayNodes AS + + CREATE TABLE :SCHEMA.PathwayNodes AS SELECT pa.source_id , pa.pathway_source , pn.display_label @@ -280,27 +254,40 @@ , type AS node_type , ee.pathway_id , er.reaction_source_id - FROM EnzymeEdges ee - INNER JOIN EnzymeReactions er ON er.node_id = ee.e_id - LEFT JOIN NodesWithParents nwp ON ee.e_id = nwp.pathway_node_id + FROM :SCHEMA.EnzymeEdges ee + INNER JOIN :SCHEMA.EnzymeReactions er ON er.node_id = ee.e_id + LEFT JOIN :SCHEMA.NodesWithParents nwp ON ee.e_id = nwp.pathway_node_id UNION SELECT nwp.parent , NULL , 'nodeOfNodes' , pathway_id , NULL - FROM NodesWithParents nwp + FROM :SCHEMA.NodesWithParents nwp UNION SELECT nwt.pathway_node_id::varchar AS pathway_node_id , NULL AS parent , nwt.type , nwt.pathway_id , NULL - FROM NodesWithTypes nwt + FROM :SCHEMA.NodesWithTypes nwt WHERE nwt.type != 'enzyme' ) nodes_with_parents - INNER JOIN PathwayAttributes pa ON nodes_with_parents.pathway_id = pa.pathway_id - LEFT JOIN NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar - + INNER JOIN :SCHEMA.PathwayAttributes pa ON nodes_with_parents.pathway_id = pa.pathway_id + LEFT JOIN :SCHEMA.NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar ; +drop table :SCHEMA.NodesWithTypes; +drop table :SCHEMA.ReactionsWithReversibility; +drop table :SCHEMA.EnzymeEdges; +drop table :SCHEMA.ParentNodes; +drop table :SCHEMA.NodesWithParents; +drop table :SCHEMA.EnzymeReactions; +drop table :SCHEMA.ParentsForEdges; +drop table :SCHEMA.ParentsForEdges; + + + + + + diff --git a/Model/lib/psql/webtables/MG/PathwayReactions.psql b/Model/lib/psql/webtables/MG/PathwayReactions.psql index 29f1ecef7c..5bdde8419a 100644 --- a/Model/lib/psql/webtables/MG/PathwayReactions.psql +++ b/Model/lib/psql/webtables/MG/PathwayReactions.psql @@ -1,6 +1,4 @@ - - - CREATE TABLE PathwayReactions AS + CREATE TABLE :SCHEMA.PathwayReactions AS SELECT o.* , CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html FROM ( @@ -57,8 +55,8 @@ , SRES.PATHWAYNODE pn , SRES.PATHWAYRELATIONSHIP prel , SRES.ONTOLOGYTERM ot - , PathwayCompounds pc - LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id + , :SCHEMA.PathwayCompounds pc + LEFT JOIN :SCHEMA.CompoundAttributes ca ON pc.chebi_accession = ca.source_id WHERE p.PATHWAY_ID = prr.PATHWAY_ID AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID @@ -101,7 +99,7 @@ , SRES.EXTERNALDATABASERELEASE edr , SRES.ONTOLOGYTERM ot , rep - , PathwayCompounds pc + , :SCHEMA.PathwayCompounds pc LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id WHERE p.PATHWAY_ID = prr.PATHWAY_ID AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID @@ -128,14 +126,9 @@ ) i LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number ) o - - ; - - - - create index PathRcts_id_ix - on PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name) + ; - - ; + create index :SCHEMA.PathRcts_id_ix + on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name) + ; From 50e942f579a4aa3fa4976b179cd6dcda58474e50 Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Fri, 9 May 2025 17:05:49 -0400 Subject: [PATCH 003/112] rm spclosure and move genmicseq attributes --- .../webtables/??/SequencePieceClosure.psql | 48 ------------------- .../{?? => MO}/GenomicSeqAttributes.psql | 0 2 files changed, 48 deletions(-) delete mode 100644 Model/lib/psql/webtables/??/SequencePieceClosure.psql rename Model/lib/psql/webtables/{?? => MO}/GenomicSeqAttributes.psql (100%) diff --git a/Model/lib/psql/webtables/??/SequencePieceClosure.psql b/Model/lib/psql/webtables/??/SequencePieceClosure.psql deleted file mode 100644 index 3993db26d3..0000000000 --- a/Model/lib/psql/webtables/??/SequencePieceClosure.psql +++ /dev/null @@ -1,48 +0,0 @@ - - - CREATE TABLE :ORG_ABBREVSequencePieceClosure AS - SELECT sp.sequence_piece_id, - sp.virtual_na_sequence_id, - sp.piece_na_sequence_id, - sp.sequence_order, - sp.distance_from_left, - sp.uncertainty, - sp.strand_orientation, - sp.start_position, - sp.end_position, - sp.modification_date, - 1 AS edge_level - FROM dots.SequencePiece sp, dots.NaSequence ns - WHERE sp.piece_na_sequence_id = ns.na_sequence_id - AND (ns.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) - - ; - - - - /* known issue: this should be run not just once, but iteratively - until it doesn't create new records. Currently (7/2008), - SequencePieces aren't nested even once. */ - INSERT INTO :ORG_ABBREVSequencePieceClosure - (edge_level, virtual_na_sequence_id, piece_na_sequence_id, - distance_from_left, strand_orientation, modification_date, - start_position, end_position, sequence_order, sequence_piece_id) - SELECT 2, higher.virtual_na_sequence_id, lower.piece_na_sequence_id, - higher.distance_from_left, - case - when coalesce(higher.strand_orientation, '+') = coalesce(lower.strand_orientation, '+') - then '+' - else '-' - end as strand_orientation, - now(), - higher.start_position - lower.distance_from_left, - higher.end_position - lower.distance_from_left, - higher.sequence_order, - nextval('dots.sequencepiece_sq') - FROM :ORG_ABBREVSequencePieceClosure higher, :ORG_ABBREVSequencePieceClosure lower - WHERE higher.piece_na_sequence_id = lower.virtual_na_sequence_id - AND higher.start_position >= lower.start_position + lower.distance_from_left - AND higher.end_position <= lower.end_position + lower.distance_from_left - - ; - diff --git a/Model/lib/psql/webtables/??/GenomicSeqAttributes.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/??/GenomicSeqAttributes.psql rename to Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql From 758358e8306958c7758adc4c0b0f49df47625c2d Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Fri, 9 May 2025 17:07:37 -0400 Subject: [PATCH 004/112] organism tables... and some protein tables --- .../webtables/MO/GenomicSeqAttributes.psql | 49 ++------ .../webtables/MO/GenomicSeqAttributes_ix.psql | 23 ++++ .../psql/webtables/MO/GenomicSequenceId.psql | 5 +- .../webtables/MO/GenomicSequenceId_ix.psql | 6 +- .../webtables/MO/GenomicSequenceSequence.psql | 13 +- .../MO/GenomicSequenceSequence_ix.psql | 2 +- .../webtables/MO/OrganismAbbreviation.psql | 10 +- .../MO/OrganismAbbreviationBlast.psql | 3 - .../webtables/MO/OrganismSelectTaxonRank.psql | 5 +- .../lib/psql/webtables/MO/PdbSimilarity.psql | 11 +- .../webtables/MO/SequencePieceClosure.psql | 48 ++++++++ .../webtables/MO/SignalPeptideDomains.psql | 9 +- .../webtables/MO/SignalPeptideDomains_ix.psql | 8 +- Model/lib/psql/webtables/MO/TaxonSpecies.psql | 21 +--- Model/lib/psql/webtables/MO/Taxonomy.psql | 8 +- Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 4 +- .../webtables/MO/TransmembraneDomains.psql | 12 +- .../webtables/MO/TransmembraneDomains_ix.psql | 4 +- Model/lib/xml/tuningManager/tablePruning.txt | 4 +- Model/lib/xml/tuningManager/webtables.org | 114 ++++++++++++++++++ 20 files changed, 240 insertions(+), 119 deletions(-) create mode 100644 Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql create mode 100644 Model/lib/psql/webtables/MO/SequencePieceClosure.psql create mode 100644 Model/lib/xml/tuningManager/webtables.org diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql index 6922acea92..6b964c4328 100644 --- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql +++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql @@ -1,8 +1,8 @@ - - - CREATE TABLE :ORG_ABBREVGenomicSeqAttributes AS +:CREATE_AND_POPULATE SELECT - cast(apidb.prefixed_project_id(tn.name, ':ORG_ABBREV') as varchar(20)) as project_id, + :PROJECT_ID as project_id, + :ORG_ABBREV as org_abbrev, + current_timestamp as modification_date, SUBSTR(sequence.source_id, 1, 60) AS source_id, sequence.a_count, sequence.c_count, sequence.g_count, sequence.t_count, (sequence.length @@ -22,18 +22,15 @@ SUBSTR(sequence.chromosome, 1, 20) AS chromosome, sequence.external_database_release_id, sequence.sequence_ontology_id, sequence.chromosome_order_num, so.source_id as so_id, so.name as sequence_type, - coalesce(virtualization.is_top_level, 1) as is_top_level, + 1 as is_top_level, sequence.na_sequence_id, organism.genome_source, organism.name_for_filenames, coalesce(msa.has_msa, 0) as has_msa - FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon.taxon_id = organism.taxon_id, + FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon_id = :TAXON_ID and taxon.taxon_id = organism.taxon_id, sres.OntologyTerm so, ( SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description, a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id FROM dots.ExternalNaSequence - UNION - SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description, - a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id - FROM dots.VirtualSequence + WHERE taxon_id = :TAXON_ID ) sequence LEFT JOIN (SELECT drns.na_sequence_id, max(dr.primary_identifier) AS genbank_accession @@ -53,10 +50,6 @@ WHERE edr.external_database_id = ed.external_database_id ) db ON sequence.external_database_release_id = db.external_database_release_id LEFT JOIN - (SELECT distinct piece_na_sequence_id, 0 as is_top_level - FROM :ORG_ABBREVSequencePieceClosure - ) virtualization ON sequence.na_sequence_id = virtualization.piece_na_sequence_id - LEFT JOIN (SELECT a_na_sequence_id as na_sequence_id, 1 as has_msa FROM apidb.Synteny syn GROUP BY a_na_sequence_id @@ -73,30 +66,6 @@ AND (sequence.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle') ORDER BY organism, source_id - - ; - - - - create unique index pk_SeqAttr_ ON :ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id) - - ; - - - - create unique index SeqAttr_source_id ON :ORG_ABBREVGenomicSeqAttributes (source_id) - - ; - - - - create unique index SeqAttr_naseqid ON :ORG_ABBREVGenomicSeqAttributes (na_sequence_id) - - ; - - - - create unique index SeqAttr_taxsrc_id ON :ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id) - - ; + +:DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql new file mode 100644 index 0000000000..7465dc4dbb --- /dev/null +++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql @@ -0,0 +1,23 @@ + + + create unique index :SCHEMA.:ORG_ABBREVpk_SeqAttr_ ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id) + + ; + + + + create unique index :SCHEMA.:ORG_ABBREVSeqAttr_source_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (source_id) + + ; + + + + create unique index :SCHEMA.:ORG_ABBREVSeqAttr_naseqid ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (na_sequence_id) + + ; + + + + create unique index :SCHEMA.:ORG_ABBREVSeqAttr_taxsrc_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id) + + ; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql index 3b5c7cbe91..7cf8dadc40 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql @@ -1,12 +1,10 @@ :CREATE_AND_POPULATE - - - CREATE TABLE GenomicSequenceId AS SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence FROM ( SELECT ns.source_id as id, ns.source_id as sequence FROM dots.NaSequence ns, sres.OntologyTerm oterm WHERE ns.sequence_ontology_id = oterm.ontology_term_id + AND ns.taxon_id = :TAXON_ID AND oterm.name in ('random_sequence', 'contig', 'supercontig', 'chromosome','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle','kinetoplast') UNION SELECT dr.primary_identifier AS id, ns.source_id AS sequence @@ -14,6 +12,7 @@ sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE dr.primary_identifier IS NOT NULL + AND ns.taxon_id = :TAXON_ID AND ns.na_sequence_id = drnf.na_sequence_id AND drnf.db_ref_id = dr.db_ref_id AND dr.external_database_release_id diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql index 90bc21c3d6..a1fa450bcc 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql @@ -1,20 +1,20 @@ - CREATE INDEX GenSeqId_sequence_idx ON GenomicSequenceId (sequence, id) + CREATE INDEX :SCHEMA.GenSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id) ; - CREATE INDEX GenSeqId_id_idx ON GenomicSequenceId (id, sequence) + CREATE INDEX :SCHEMA.GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence) ; - CREATE INDEX GenSeqId_lowid_idx ON GenomicSequenceId (lower(id), sequence) + CREATE INDEX :SCHEMA.GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence) ; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql index be8a73a415..86d8919c6b 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql @@ -1,13 +1,12 @@ :CREATE_AND_POPULATE - - - CREATE TABLE GenomicSequenceSequence AS - SELECT sa.source_id, cast(apidb.project_id(tn.name) as varchar(20)) as project_id, + SELECT :PROJECT_ID as project_id, + :ORG_ABBREV as org_abbrev, + current_timestamp as modification_date, + sa.source_id, ns.sequence - FROM GenomicSeqAttributes sa, dots.NaSequence ns, sres.TaxonName tn + FROM :SCHEMA.GenomicSeqAttributes sa, dots.NaSequence ns WHERE sa.na_sequence_id = ns.na_sequence_id - AND ns.taxon_id = tn.taxon_id - AND tn.name_class = 'scientific name' + and sa.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql index d68fd68292..7d28f93796 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql @@ -1,6 +1,6 @@ - create index GenomicSeq_ix on GenomicSequenceSequence (source_id, project_id) + create index :SCHEMA.:ORG_ABBREVGenomicSeq_ix on :SCHEMA.:ORG_ABBREVGenomicSequenceSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql index 56714b16e6..97944361b3 100644 --- a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql +++ b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql @@ -1,15 +1,9 @@ :CREATE_AND_POPULATE - - - CREATE TABLE OrganismAbbreviation AS - SELECT organism, abbreviation - FROM ( - select tn.name as organism, o.name_for_filenames as abbreviation + select tn.name as organism, o.name_for_filenames from apidb.Organism o, sres.TaxonName tn where o.taxon_id = tn.taxon_id and tn.name_class = 'scientific name' - ) subquery1 - + and o.taxon_id = :TAXON_ID :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql index f04888b75c..f098098f71 100644 --- a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql +++ b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql @@ -1,7 +1,4 @@ :CREATE_AND_POPULATE - - - CREATE TABLE OrganismAbbreviationBlast as SELECT organism, parent, abbreviation, substr(project_id, 1, 20) as project_id FROM OrganismAbbreviationWS UNION diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql index 326c177a8a..75792a7940 100644 --- a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql +++ b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql @@ -1,7 +1,4 @@ :CREATE_AND_POPULATE - - - CREATE TABLE OrganismSelectTaxonRank AS WITH organism_rank AS ( SELECT tn1.name as organism, o.public_abbrev, tn2.name as parent_organism, case when tn2.name = 'Oomycetes' then 'class' else r.rank end as rank @@ -9,7 +6,7 @@ WITH RECURSIVE cte AS( SELECT taxon_id input, taxon_id, rank, parent_id FROM sres.taxon - WHERE taxon_id IN (SELECT taxon_id FROM apidb.organism WHERE is_annotated_genome = 1) + WHERE taxon_id IN (SELECT taxon_id FROM apidb.organism WHERE taxon_id = :TAXON_ID and is_annotated_genome = 1) UNION SELECT cte.input, t.taxon_id, t.rank, t.parent_id FROM sres.taxon t, cte diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity.psql b/Model/lib/psql/webtables/MO/PdbSimilarity.psql index 2818d01271..7a2644ce2b 100644 --- a/Model/lib/psql/webtables/MO/PdbSimilarity.psql +++ b/Model/lib/psql/webtables/MO/PdbSimilarity.psql @@ -1,8 +1,8 @@ :CREATE_AND_POPULATE - - - CREATE TABLE PdbSimilarity AS - SELECT ta.source_id, eas.source_id AS pdb_chain, + SELECT :PROJECT_ID as project_id, + :ORG_ABBREV as org_abbrev, + current_timestamp as modification_date, + ta.source_id, eas.source_id AS pdb_chain, substr(eas.description, 1, 100) AS pdb_title, substr(eas.source_id , 1 @@ -20,11 +20,12 @@ FROM apidb.PdbSimilarity s, apiDB.ProteinDataBank eas, sres.TaxonName tn, - TranscriptAttributes ta + :SCHEMA.TranscriptAttributes ta WHERE ta.aa_sequence_id = s.aa_sequence_id AND s.pident = eas.protein_data_bank_id and tn.name_class = 'scientific name' AND eas.taxon_id = tn.taxon_id + and ta.org_abbrev = ':ORG_ABBREV' ORDER BY ta.source_id, eas.source_id diff --git a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql new file mode 100644 index 0000000000..3993db26d3 --- /dev/null +++ b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql @@ -0,0 +1,48 @@ + + + CREATE TABLE :ORG_ABBREVSequencePieceClosure AS + SELECT sp.sequence_piece_id, + sp.virtual_na_sequence_id, + sp.piece_na_sequence_id, + sp.sequence_order, + sp.distance_from_left, + sp.uncertainty, + sp.strand_orientation, + sp.start_position, + sp.end_position, + sp.modification_date, + 1 AS edge_level + FROM dots.SequencePiece sp, dots.NaSequence ns + WHERE sp.piece_na_sequence_id = ns.na_sequence_id + AND (ns.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) + + ; + + + + /* known issue: this should be run not just once, but iteratively + until it doesn't create new records. Currently (7/2008), + SequencePieces aren't nested even once. */ + INSERT INTO :ORG_ABBREVSequencePieceClosure + (edge_level, virtual_na_sequence_id, piece_na_sequence_id, + distance_from_left, strand_orientation, modification_date, + start_position, end_position, sequence_order, sequence_piece_id) + SELECT 2, higher.virtual_na_sequence_id, lower.piece_na_sequence_id, + higher.distance_from_left, + case + when coalesce(higher.strand_orientation, '+') = coalesce(lower.strand_orientation, '+') + then '+' + else '-' + end as strand_orientation, + now(), + higher.start_position - lower.distance_from_left, + higher.end_position - lower.distance_from_left, + higher.sequence_order, + nextval('dots.sequencepiece_sq') + FROM :ORG_ABBREVSequencePieceClosure higher, :ORG_ABBREVSequencePieceClosure lower + WHERE higher.piece_na_sequence_id = lower.virtual_na_sequence_id + AND higher.start_position >= lower.start_position + lower.distance_from_left + AND higher.end_position <= lower.end_position + lower.distance_from_left + + ; + diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql index 80e45141ca..2ce374b972 100644 --- a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql +++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql @@ -1,9 +1,9 @@ :CREATE_AND_POPULATE - - - CREATE TABLE :ORG_ABBREVSignalPeptideDomains AS SELECT - gf.source_id gene_source_id + :PROJECT_ID as project_id + , :ORG_ABBREV as org_abbrev + , current_timestamp as modification_date + , gf.source_id gene_source_id , t.source_id transcript_source_id , taf.na_feature_id , spf.aa_feature_id @@ -28,6 +28,7 @@ AND taf.aa_sequence_id = tas.aa_sequence_id AND tas.aa_sequence_id = spf.aa_sequence_id AND gf.na_feature_id = t.parent_id + AND tas.taxon_id = :TAXON_ID AND (spf.signal_probability >= .5 OR spf.signal_probability IS NULL OR ((spf.means_score + spf.maxy_score) / 2) >= .5 diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql index b305c86713..d5b2c93f6c 100644 --- a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql +++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql @@ -1,15 +1,15 @@ - CREATE INDEX SignalP1_ix - ON :ORG_ABBREVSignalPeptideDomains (aa_sequence_id) + CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP1_ix + ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (aa_sequence_id) ; - CREATE INDEX SignalP2_ix - ON :ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max) + CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP2_ix + ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max) ; diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webtables/MO/TaxonSpecies.psql index 7ef89cd520..79a6023d27 100644 --- a/Model/lib/psql/webtables/MO/TaxonSpecies.psql +++ b/Model/lib/psql/webtables/MO/TaxonSpecies.psql @@ -1,25 +1,10 @@ - - - CREATE UNLOGGED TABLE :ORG_ABBREVTaxonOfInterest AS - SELECT taxon_id - FROM :ORG_ABBREVGenomicSeqAttributes - UNION - SELECT ns.taxon_id - FROM dots.NaSequence ns, dots.Est - WHERE est.na_sequence_id = ns.na_sequence_id - - ; - +-- recursively walk taxon tree to find ancestor with rank "species" +-- Update this to select max/min level with rank species if there are multiple :CREATE_AND_POPULATE - - - CREATE TABLE :ORG_ABBREVTaxonSpecies as - -- recursively walk taxon tree to find ancestor with rank "species" - -- Update this to select max/min level with rank species if there are multiple WITH RECURSIVE cte AS ( SELECT TAXON_ID, taxon_id as parent_id, 1 as lvl FROM sres.taxon - WHERE taxon_id IN (SELECT taxon_id FROM :ORG_ABBREVTaxonofinterest) + WHERE taxon_id = :TAXON_ID UNION ALL SELECT cte.taxon_id, sub.parent_id, lvl + 1 FROM cte, sres.taxon sub diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webtables/MO/Taxonomy.psql index 249eda7162..cbc2cbc0fa 100644 --- a/Model/lib/psql/webtables/MO/Taxonomy.psql +++ b/Model/lib/psql/webtables/MO/Taxonomy.psql @@ -1,7 +1,4 @@ :CREATE_AND_POPULATE - - - CREATE TABLE Taxonomy as WITH RECURSIVE cte AS ( WITH tax AS( SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id, @@ -13,7 +10,7 @@ ) SELECT tax.*, name as organism, ARRAY[taxon_id::numeric] as path FROM tax - WHERE name IN (SELECT DISTINCT organism FROM GeneAttributes) + WHERE taxon_id = :TAXON_ID UNION SELECT tax.*, cte.organism, cte.path || tax.taxon_id as path FROM tax, cte @@ -22,7 +19,4 @@ ) SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum FROM (SELECT cte.* FROM cte ORDER BY path) t - - :DECLARE_PARTITION; - diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql index 84083eda42..b5e2d3fb41 100644 --- a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql +++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql @@ -1,7 +1,7 @@ - create index tax_ix - on Taxonomy + create index :SCHEMA.tax_ix + on :SCHEMA.Taxonomy (organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank) diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql index 7f76786eb0..99406037ee 100644 --- a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql +++ b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql @@ -1,8 +1,8 @@ :CREATE_AND_POPULATE - - - CREATE TABLE TransmembraneDomains AS - SELECT ta.source_id as transcript_source_id + SELECT :PROJECT_ID as project_id + , :ORG_ABBREV as org_abbrev + , current_timestamp as modification_date + , ta.source_id as transcript_source_id , ta.gene_source_id AS gene_source_id , ta.project_id , tmf.topology AS tmf_topology @@ -13,7 +13,7 @@ , tmf.aa_sequence_id tmf_aa_sequence_id , tas.source_id as protein_source_id FROM dots.aalocation aal - , transcriptattributes ta + , :SCHEMA.transcriptattributes ta , dots.translatedaafeature taf , dots.translatedaasequence tas , dots.transmembraneaafeature tmf @@ -21,7 +21,7 @@ AND taf.aa_sequence_id = tas.aa_sequence_id AND tas.aa_sequence_id = tmf.aa_sequence_id and tmf.aa_feature_id = aal.aa_feature_id - + and ta.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql index bdefeef42d..f6aea03de7 100644 --- a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql +++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql @@ -1,7 +1,7 @@ - create index TransDom1_ix - on TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology) + create index :SCHEMA.:ORG_ABBREVTransDom1_ix + on :SCHEMA.:ORG_ABBREVTransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology) ; diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt index c966901176..6f0b461f33 100644 --- a/Model/lib/xml/tuningManager/tablePruning.txt +++ b/Model/lib/xml/tuningManager/tablePruning.txt @@ -8,7 +8,7 @@ MG ?? ?? ?? -?? +R ?? ?? ?? @@ -82,7 +82,7 @@ K R R K -MO +K K MG (tuning from non gus tables) MG (tuning from non gus tables) diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org new file mode 100644 index 0000000000..1265054f2e --- /dev/null +++ b/Model/lib/xml/tuningManager/webtables.org @@ -0,0 +1,114 @@ +#+title: Webtables + + +* MO Tables +- Organism + - [X] OrganismAbbreviationBlast_ix.psql + - [X] OrganismAbbreviationBlast.psql + - move to KEEP + - [X] OrganismAbbreviation_ix.psql + - [X] OrganismAbbreviation.psql + - updated the abbreviation field to name_for_filenames + - [X] OrganismSelectTaxonRank_ix.psql + - [X] OrganismSelectTaxonRank.psql + - [X] Taxonomy_ix.psql + - [X] Taxonomy.psql + - [X] TaxonSpecies_ix.psql + - [X] TaxonSpecies.psql +- Genomic Sequence + - [X] GenomicSequenceId_ix.psql + - [X] GenomicSequenceId.psql + - [X] GenomicSequenceSequence_ix.psql + - [X] GenomicSequenceSequence.psql + - [X] SequencePieceClosure + - [X] GenomicSeqAttributes + +- Transcript / Protein + - [X] SignalPeptideDomains_ix.psql + - [X] SignalPeptideDomains.psql + - [X] TransmembraneDomains_ix.psql + - [X] TransmembraneDomains.psql + - [X] PdbSimilarity_ix.psql + - [X] PdbSimilarity.psql + - [ ] ProteinSequence_ix.psql + - [ ] ProteinSequence.psql + - [ ] ProteinAttributes_ix.psql + - [ ] ProteinAttributes.psql + - [ ] TranscriptAttributes_ix.psql + - [ ] TranscriptAttributes.psql + - [ ] CodingSequence_ix.psql + - [ ] CodingSequence.psql + - [ ] IntronUtrCoords_ix.psql + - [ ] IntronUtrCoords.psql + - [ ] TranscriptCenDistance_ix.psql + - [ ] TranscriptCenDistance.psql + - [ ] TranscriptPathway_ix.psql + - [ ] TranscriptPathway.psql + - [ ] TranscriptSequence_ix.psql + - [ ] TranscriptSequence.psql + - [ ] ChIPchipTranscript_ix.psql + - [ ] ChIPchipTranscript.psql + +- Gene + - [ ] GeneId_ix.psql + - [ ] GeneId.psql + - [ ] GeneAttributes_ix.psql + - [ ] GeneAttributes.psql + - [ ] GeneCopyNumbers_ix.psql + - [ ] GeneCopyNumbers.psql + - [ ] GeneGoTable_ix.psql + - [ ] GeneGoTable.psql + - [ ] GeneGoTerms_ix.psql + - [ ] GeneGoTerms.psql + - [ ] GeneLocations_ix.psql + - [ ] GeneLocations.psql + - [ ] GeneModelDump_ix.psql + - [ ] GeneModelDump.psql + - [ ] GeneSummaryFilter_ix.psql + - [ ] GeneSummaryFilter.psql + - [ ] TFBSGene_ix.psql + - [ ] TFBSGene.psql + - [ ] PathwayNodeGene_ix.psql + - [ ] PathwayNodeGene.psql + - [ ] PathwaysGeneTable_ix.psql + - [ ] PathwaysGeneTable.psql + - [ ] GoTermSummary_ix.psql + - [ ] GoTermSummary.psql + - [ ] EqtlSpan_ix.psql + - [ ] EqtlSpan.psql + +- EST + - [ ] EstAlignmentGeneSummary_ix.psql + - [ ] EstAlignmentGeneSummary.psql + - [ ] EstAttributes_ix.psql + - [ ] EstAttributes.psql + - [ ] EstSequence_ix.psql + - [ ] EstSequence.psql + +- Dataset / Other + - [ ] DatasetExampleSourceId_ix.psql + - [ ] DatasetExampleSourceId.psql + - [ ] Profile_ix.psql + - [ ] Profile.psql + - [ ] ProfileSamples_ix.psql + - [ ] ProfileSamples.psql + - [ ] ProfileType_ix.psql + - [ ] ProfileType.psql + - [ ] RnaSeqStats_ix.psql + - [ ] RnaSeqStats.psql + - [ ] OrganismAttributes_ix.psql + - [ ] OrganismAttributes.psql + - [ ] ChrCopyNumbers_ix.psql + - [ ] ChrCopyNumbers.psql + +- Junctions (Kathryn) + - [ ] IntronSupportLevel_ix.psql + - [ ] IntronSupportLevel.psql + - [ ] GeneIntJuncStats_ix.psql + - [ ] GeneIntJuncStats.psql + - [ ] GeneIntronJunction_ix.psql + - [ ] GeneIntronJunction.psql + - [ ] NameMappingGIJ_ix.psql + - [ ] NameMappingGIJ.psql + - [ ] GeneMaxIntronGIJ_ix.psql + - [ ] GeneMaxIntronGIJ.psql From 9a3a48f32e041da4d7bd1f2fb40f902364f11da8 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 9 May 2025 21:18:48 -0400 Subject: [PATCH 005/112] break out indexes into _ix files --- Model/lib/psql/webtables/MG/CompoundAttributes.psql | 2 -- Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql | 2 ++ Model/lib/psql/webtables/MG/CompoundId.psql | 2 -- Model/lib/psql/webtables/MG/CompoundId_ix.psql | 2 ++ Model/lib/psql/webtables/MG/GroupDomainAttribute.psql | 2 -- Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql | 2 ++ Model/lib/psql/webtables/MG/OntologyLevels.psql | 2 -- Model/lib/psql/webtables/MG/OntologyLevels_ix.psql | 2 ++ Model/lib/psql/webtables/MG/PathwayAttributes.psql | 7 ------- Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql | 7 +++++++ Model/lib/psql/webtables/MG/PathwayCompounds.psql | 3 --- Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql | 3 +++ Model/lib/psql/webtables/MG/PathwayReactions.psql | 3 --- Model/lib/psql/webtables/MG/PathwayReactions_ix.psql | 3 +++ 14 files changed, 21 insertions(+), 21 deletions(-) create mode 100644 Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql create mode 100644 Model/lib/psql/webtables/MG/CompoundId_ix.psql create mode 100644 Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql create mode 100644 Model/lib/psql/webtables/MG/OntologyLevels_ix.psql create mode 100644 Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql create mode 100644 Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql create mode 100644 Model/lib/psql/webtables/MG/PathwayReactions_ix.psql diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes.psql b/Model/lib/psql/webtables/MG/CompoundAttributes.psql index ea8207e077..22f54648e9 100644 --- a/Model/lib/psql/webtables/MG/CompoundAttributes.psql +++ b/Model/lib/psql/webtables/MG/CompoundAttributes.psql @@ -16,6 +16,4 @@ GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids ; - CREATE INDEX :SCHEMA.CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id) - ; diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql b/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql new file mode 100644 index 0000000000..a16c042e5b --- /dev/null +++ b/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql @@ -0,0 +1,2 @@ + CREATE INDEX :SCHEMA.CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id) + ; diff --git a/Model/lib/psql/webtables/MG/CompoundId.psql b/Model/lib/psql/webtables/MG/CompoundId.psql index e235b32106..ca4a9c9c60 100644 --- a/Model/lib/psql/webtables/MG/CompoundId.psql +++ b/Model/lib/psql/webtables/MG/CompoundId.psql @@ -27,6 +27,4 @@ AND n.type = 'SYNONYM' ; - CREATE INDEX :SCHEMA.CompoundId_idx ON :SCHEMA.CompoundId (id, compound) - ; diff --git a/Model/lib/psql/webtables/MG/CompoundId_ix.psql b/Model/lib/psql/webtables/MG/CompoundId_ix.psql new file mode 100644 index 0000000000..217b020bac --- /dev/null +++ b/Model/lib/psql/webtables/MG/CompoundId_ix.psql @@ -0,0 +1,2 @@ + CREATE INDEX :SCHEMA.CompoundId_idx ON :SCHEMA.CompoundId (id, compound) + ; diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql index b023dc3669..e9f869535b 100644 --- a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql +++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql @@ -21,6 +21,4 @@ WHERE og.group_id = ag.group_name ) ; -CREATE INDEX SCHEMA.GroupDomainAttribute_idx ON SCHEMA.GroupDomainAttribute (group_name) - ; diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql new file mode 100644 index 0000000000..4112a31ce3 --- /dev/null +++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql @@ -0,0 +1,2 @@ +CREATE INDEX SCHEMA.GroupDomainAttribute_idx ON SCHEMA.GroupDomainAttribute (group_name) + ; diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql index 417c6657aa..243ab0c1f1 100644 --- a/Model/lib/psql/webtables/MG/OntologyLevels.psql +++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql @@ -30,6 +30,4 @@ GROUP BY ontology_term_id ; - create index olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth) - ; diff --git a/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql b/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql new file mode 100644 index 0000000000..708dc47e5a --- /dev/null +++ b/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql @@ -0,0 +1,2 @@ + create index :SCHEMA.olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth) + ; diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webtables/MG/PathwayAttributes.psql index 12720d8b73..e976027524 100644 --- a/Model/lib/psql/webtables/MG/PathwayAttributes.psql +++ b/Model/lib/psql/webtables/MG/PathwayAttributes.psql @@ -47,11 +47,4 @@ AND ed.name NOT LIKE '%MPMP%' ; - CREATE UNIQUE INDEX :SCHEMA.PathAttr_sourceId_pwaySrc - ON :SCHEMA.PathwayAttributes (source_id, pathway_source) - ; - - create index :SCHEMA.PathAttr_ix - on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count) - ; diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql b/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql new file mode 100644 index 0000000000..99f50c5a08 --- /dev/null +++ b/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql @@ -0,0 +1,7 @@ + CREATE UNIQUE INDEX :SCHEMA.PathAttr_sourceId_pwaySrc + ON :SCHEMA.PathwayAttributes (source_id, pathway_source) + ; + + create index :SCHEMA.PathAttr_ix + on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count) + ; diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds.psql b/Model/lib/psql/webtables/MG/PathwayCompounds.psql index a30b299c31..68fcc82904 100644 --- a/Model/lib/psql/webtables/MG/PathwayCompounds.psql +++ b/Model/lib/psql/webtables/MG/PathwayCompounds.psql @@ -78,7 +78,4 @@ ) t2 LEFT OUTER JOIN CHEBI.COMPOUNDS c on t2.row_id = c.ID ; - create index :SCHEMA.PthCmpd_id_ix - on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name) - ; diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql b/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql new file mode 100644 index 0000000000..fbcdfa72e1 --- /dev/null +++ b/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql @@ -0,0 +1,3 @@ + create index :SCHEMA.PthCmpd_id_ix + on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name) + ; diff --git a/Model/lib/psql/webtables/MG/PathwayReactions.psql b/Model/lib/psql/webtables/MG/PathwayReactions.psql index 5bdde8419a..5787e17267 100644 --- a/Model/lib/psql/webtables/MG/PathwayReactions.psql +++ b/Model/lib/psql/webtables/MG/PathwayReactions.psql @@ -128,7 +128,4 @@ ) o ; - create index :SCHEMA.PathRcts_id_ix - on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name) - ; diff --git a/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql b/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql new file mode 100644 index 0000000000..e0ed73978b --- /dev/null +++ b/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql @@ -0,0 +1,3 @@ + create index :SCHEMA.PathRcts_id_ix + on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name) + ; From fcfb72559b1bd64285c25dbbbaa23d422a36f5b9 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Mon, 12 May 2025 12:16:09 -0400 Subject: [PATCH 006/112] add macros and fix index files --- .../lib/psql/webtables/MG/OntologyLevels.psql | 3 +- .../webtables/MO/ChIPchipTranscript_ix.psql | 6 +- .../psql/webtables/MO/ChrCopyNumbers_ix.psql | 15 +-- .../psql/webtables/MO/CodingSequence_ix.psql | 6 +- Model/lib/psql/webtables/MO/EqtlSpan_ix.psql | 8 +- .../webtables/MO/EstAlignmentGeneSummary.psql | 2 + .../lib/psql/webtables/MO/EstSequence_ix.psql | 4 - .../psql/webtables/MO/GeneAttributes_ix.psql | 85 +++---------- .../psql/webtables/MO/GeneCopyNumbers_ix.psql | 8 +- .../lib/psql/webtables/MO/GeneGoTable_ix.psql | 6 +- .../lib/psql/webtables/MO/GeneGoTerms_ix.psql | 6 +- Model/lib/psql/webtables/MO/GeneId_ix.psql | 30 +---- .../webtables/MO/GeneIntJuncStats_ix.psql | 5 +- .../webtables/MO/GeneIntronJunction_ix.psql | 18 +-- .../psql/webtables/MO/GeneLocations_ix.psql | 8 +- .../webtables/MO/GeneMaxIntronGIJ_ix.psql | 5 +- .../psql/webtables/MO/GeneModelDump_ix.psql | 8 +- .../webtables/MO/GenomicSeqAttributes_ix.psql | 12 -- .../psql/webtables/MO/GenomicSequenceId.psql | 5 +- .../webtables/MO/GenomicSequenceId_ix.psql | 18 +-- .../MO/GenomicSequenceSequence_ix.psql | 4 - .../psql/webtables/MO/GoTermSummary_ix.psql | 12 +- .../psql/webtables/MO/IntronUtrCoords_ix.psql | 16 +-- .../psql/webtables/MO/NameMappingGIJ_ix.psql | 5 +- .../webtables/MO/OrganismAbbreviation.psql | 5 +- .../MO/OrganismAbbreviationBlast.psql | 11 +- .../psql/webtables/MO/OrganismAttributes.psql | 35 +++-- .../webtables/MO/OrganismAttributes_ix.psql | 6 +- .../webtables/MO/OrganismSelectTaxonRank.psql | 5 +- .../webtables/MO/PathwaysGeneTable_ix.psql | 5 +- .../psql/webtables/MO/ProfileSamples_ix.psql | 16 +-- Model/lib/psql/webtables/MO/Profile_ix.psql | 24 +--- .../psql/webtables/MO/ProteinAttributes.psql | 12 +- .../webtables/MO/ProteinAttributes_ix.psql | 12 +- .../psql/webtables/MO/ProteinSequence_ix.psql | 6 +- .../webtables/MO/SequencePieceClosure.psql | 2 - .../webtables/MO/SignalPeptideDomains_ix.psql | 8 -- Model/lib/psql/webtables/MO/TFBSGene_ix.psql | 12 +- Model/lib/psql/webtables/MO/TaxonSpecies.psql | 5 +- Model/lib/psql/webtables/MO/Taxonomy.psql | 3 +- Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 8 +- .../webtables/MO/TranscriptAttributes.psql | 1 + .../webtables/MO/TranscriptAttributes_ix.psql | 120 ++++-------------- .../MO/TranscriptCenDistance_ix.psql | 8 +- .../webtables/MO/TranscriptPathway_ix.psql | 26 ++-- .../webtables/MO/TranscriptSequence_ix.psql | 6 +- .../webtables/MO/TransmembraneDomains_ix.psql | 4 - 47 files changed, 174 insertions(+), 461 deletions(-) diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql index 243ab0c1f1..47df6e98e2 100644 --- a/Model/lib/psql/webtables/MG/OntologyLevels.psql +++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql @@ -30,4 +30,5 @@ GROUP BY ontology_term_id ; - +drop table :SCHEMA.Is_a_links; +drop table :SCHEMA.Roots; diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql index 44e43c5b10..cfe11ee5c6 100644 --- a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql +++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql @@ -1,7 +1,3 @@ - - - create index chpgene_geneid_idx ON ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id) - - + create index :SCHEMA.:ORG_ABBREVchpgene_geneid_idx ON :SCHEMA.:ORG_ABBREVChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id) ; diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql index ab77efc977..d2d0448a29 100644 --- a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql +++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql @@ -1,16 +1,9 @@ - - - CREATE INDEX ChrCN_ix - ON ChrCopyNumbers (input_pan_id, na_sequence_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVChrCN_ix + ON :SCHEMA.:ORG_ABBREVChrCopyNumbers (input_pan_id, na_sequence_id) ; - - CREATE INDEX ChrCN_output - ON ChrCopyNumbers (output_pan_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVChrCN_output + ON :SCHEMA.:ORG_ABBREVChrCopyNumbers (output_pan_id) ; diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql index 26d8f1c327..725030a983 100644 --- a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql @@ -1,7 +1,3 @@ - - - create index CodSeq_ix on CodingSequence (source_id, project_id) - - + create index :SCHEMA.:ORG_ABBREVCodSeq_ix on :SCHEMA.:ORG_ABBREVCodingSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql index ef659c7e2a..2a14698811 100644 --- a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql +++ b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql @@ -1,8 +1,4 @@ - - - create index eqtlSpan_ix - on eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score) - - + create index :SCHEMA.:ORG_ABBREVeqtlSpan_ix + on :SCHEMA.:ORG_ABBREVeqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score) ; diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql index 1985445f0a..90b4788bce 100644 --- a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql +++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql @@ -66,3 +66,5 @@ :DECLARE_PARTITION; +drop table EstAlignmentGene; +drop table EstAlignmentNoGene; \ No newline at end of file diff --git a/Model/lib/psql/webtables/MO/EstSequence_ix.psql b/Model/lib/psql/webtables/MO/EstSequence_ix.psql index b7010a62aa..de699f5486 100644 --- a/Model/lib/psql/webtables/MO/EstSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/EstSequence_ix.psql @@ -1,7 +1,3 @@ - - create index EstSeq_ix on EstSequence (source_id, project_id) - - ; diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql index a63551450d..27f307d5dc 100644 --- a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql @@ -1,90 +1,47 @@ - - - CREATE UNIQUE INDEX GeneAttr_srcPrj - ON :ORG_ABBREVGeneAttributes (source_id) - - + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_srcPrj + ON :SCHEMA.:ORG_ABBREVGeneAttributes (source_id) ; - - - CREATE INDEX GeneAttr_exon_ix - ON :ORG_ABBREVGeneAttributes (exon_count, source_id, project_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_exon_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (exon_count, source_id, project_id) ; - - - CREATE INDEX GeneAttr_loc_ix - ON :ORG_ABBREVGeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_loc_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated) ; - - - CREATE INDEX GeneAttr_feat_ix - ON :ORG_ABBREVGeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_feat_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed) ; - - - CREATE INDEX GeneAttr_orthoname_ix ON :ORG_ABBREVGeneAttributes ( + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_orthoname_ix ON :SCHEMA.:ORG_ABBREVGeneAttributes ( orthomcl_name, source_id, taxon_id, gene_type, na_feature_id, na_sequence_id, start_min, end_max, organism, species, product, project_id ) - ; - - - CREATE INDEX GeneAttr_ortholog_ix - ON :ORG_ABBREVGeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_ortholog_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id) ; - - - CREATE INDEX GeneAttr_orgsrc_ix - ON :ORG_ABBREVGeneAttributes (organism, source_id, na_sequence_id, start_min, end_max) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_orgsrc_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (organism, source_id, na_sequence_id, start_min, end_max) ; - - - CREATE INDEX GeneAttr_prjsrc_ix - ON :ORG_ABBREVGeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0)) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_prjsrc_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0)) ; - - - CREATE INDEX GeneAttr_txid_ix - ON :ORG_ABBREVGeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_txid_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id) ; - - - CREATE INDEX GeneAttr_ids_ix - ON :ORG_ABBREVGeneAttributes (na_feature_id, source_id, project_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_ids_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_feature_id, source_id, project_id) ; - - - CREATE INDEX GeneAttr_loc_intjunc_ix - ON :ORG_ABBREVGeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_loc_intjunc_ix + ON :SCHEMA.:ORG_ABBREVGeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX) ; diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql index 7acf69d823..0108c24e21 100644 --- a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql @@ -1,8 +1,4 @@ - - - CREATE INDEX GeneCN_ix - ON GeneCopyNumbers (input_pan_id, na_sequence_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneCN_ix + ON :SCHEMA.:ORG_ABBREVGeneCopyNumbers (input_pan_id, na_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql index de449a0e54..50f4e8b3bb 100644 --- a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql @@ -1,9 +1,5 @@ - - - create index ggtab_ix ON GeneGoTable + create index :SCHEMA.:ORG_ABBREVggtab_ix ON :SCHEMA.:ORG_ABBREVGeneGoTable (source_id, project_id, go_id, transcript_ids, is_not, go_term_name, ontology, source, evidence_code, reference, evidence_code_parameter, sort_key) - - ; diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql index 8bb63eb7e4..8d6745ff93 100644 --- a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql @@ -1,10 +1,6 @@ - - - create index ggt_ix ON :ORG_ABBREVGeneGoTerms + create index ggt_ix ON :SCHEMA.:ORG_ABBREVGeneGoTerms (gene_source_id, transcript_source_id, ontology, go_id, go_term_id, go_term_name, source, evidence_code, reference, evidence_code_parameter, aa_sequence_id, is_not) - - ; diff --git a/Model/lib/psql/webtables/MO/GeneId_ix.psql b/Model/lib/psql/webtables/MO/GeneId_ix.psql index ef84acad08..55e4ca91c9 100644 --- a/Model/lib/psql/webtables/MO/GeneId_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneId_ix.psql @@ -1,35 +1,15 @@ - - - CREATE INDEX GeneId_gene_idx ON :ORG_ABBREVGeneId (gene, id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_gene_idx ON :SCHEMA.:ORG_ABBREVGeneId (gene, id) ; - - - CREATE INDEX GeneId_id_idx ON :ORG_ABBREVGeneId (id, gene) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_id_idx ON :SCHEMA.:ORG_ABBREVGeneId (id, gene) ; - - - CREATE INDEX GeneId_uniqid_idx ON :ORG_ABBREVGeneId (unique_mapping, id, gene) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_uniqid_idx ON :SCHEMA.:ORG_ABBREVGeneId (unique_mapping, id, gene) ; - - - CREATE INDEX GeneId_lowid_idx ON :ORG_ABBREVGeneId (lower(id), gene) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_lowid_idx ON :SCHEMA.:ORG_ABBREVGeneId (lower(id), gene) ; - - - CREATE INDEX GeneId_uniqlowid_idx ON :ORG_ABBREVGeneId (unique_mapping, lower(id), gene) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_uniqlowid_idx ON :SCHEMA.:ORG_ABBREVGeneId (unique_mapping, lower(id), gene) ; diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql index d2d463f8a2..063e77a1c3 100644 --- a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql @@ -1,6 +1,3 @@ - - - create index GeneIntJuncStat_ix on GeneIntJuncStats (na_sequence_id) - + create index :SCHEMA.:ORG_ABBREVGeneIntJuncStat_ix on :SCHEMA.:ORG_ABBREVGeneIntJuncStats (na_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql index 90f98ad9cf..6e650ac67c 100644 --- a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql @@ -1,19 +1,11 @@ - - - create index gijnew_loc_ix on GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) - + create index :SCHEMA.:ORG_ABBREVgijnew_loc_ix on :SCHEMA.:ORG_ABBREVGeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) ; - - - - create index gijnew_gnscid_ix on GeneIntronJunction (intron_feature_id) - + + create index :SCHEMA.:ORG_ABBREVgijnew_gnscid_ix on :SCHEMA.:ORG_ABBREVGeneIntronJunction (intron_feature_id) ; - - - create index gijnew_txnloc_ix - on GeneIntronJunction + create index :SCHEMA.:ORG_ABBREVgijnew_txnloc_ix + on :SCHEMA.:ORG_ABBREVGeneIntronJunction (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed, total_unique, total_isrpm, annotated_intron) diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql index 80b09a0fa2..45fdf888d5 100644 --- a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql @@ -1,8 +1,4 @@ - - - create index gloc_ix - on :ORG_ABBREVGeneLocations (source_id, locations) - - + create :SCHEMA.:ORG_ABBREVindex gloc_ix + on :SCHEMA.:ORG_ABBREVGeneLocations (source_id, locations) ; diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql index 2a734e98a7..0491ee072b 100644 --- a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql @@ -1,6 +1,3 @@ - - - CREATE INDEX GnMxIntGIJ_ix on GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id) - + CREATE INDEX :SCHEMA.:ORG_ABBREVGnMxIntGIJ_ix on :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJ (gene_source_id,protocol_app_node_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql index 89fc3c5b8e..4fa21c53fa 100644 --- a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql @@ -1,9 +1,5 @@ - - - create index gmd_ix - on GeneModelDump + create index :SCHEMA.:ORG_ABBREVgmd_ix + on :SCHEMA.:ORG_ABBREVGeneModelDump (source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids) - - ; diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql index 7465dc4dbb..e9b08230da 100644 --- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql @@ -1,23 +1,11 @@ - - create unique index :SCHEMA.:ORG_ABBREVpk_SeqAttr_ ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id) - ; - - create unique index :SCHEMA.:ORG_ABBREVSeqAttr_source_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (source_id) - ; - - create unique index :SCHEMA.:ORG_ABBREVSeqAttr_naseqid ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (na_sequence_id) - ; - - create unique index :SCHEMA.:ORG_ABBREVSeqAttr_taxsrc_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id) - ; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql index 7cf8dadc40..ee30a1c85f 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql @@ -1,5 +1,6 @@ :CREATE_AND_POPULATE - SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence + SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence, + :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date FROM ( SELECT ns.source_id as id, ns.source_id as sequence FROM dots.NaSequence ns, sres.OntologyTerm oterm @@ -19,7 +20,5 @@ = edr.external_database_release_id AND edr.external_database_id = ed.external_database_id ) subquery1 - - :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql index a1fa450bcc..c8b82387ac 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql @@ -1,21 +1,9 @@ - - - CREATE INDEX :SCHEMA.GenSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_sequence_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (sequence, id) ; - - - CREATE INDEX :SCHEMA.GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_id_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (id, sequence) ; - - - CREATE INDEX :SCHEMA.GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_lowid_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (lower(id), sequence) ; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql index 7d28f93796..8cf38e086b 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql @@ -1,7 +1,3 @@ - - create index :SCHEMA.:ORG_ABBREVGenomicSeq_ix on :SCHEMA.:ORG_ABBREVGenomicSequenceSequence (source_id, project_id) - - ; diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql index 55b2bbf76d..aff06120f1 100644 --- a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql +++ b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql @@ -1,16 +1,8 @@ - - - create index GoTermSum_aaSeqId_idx ON :ORG_ABBREVGoTermSummary (aa_sequence_id, go_id, source) - - + create index :SCHEMA.:ORG_ABBREVGoTermSum_aaSeqId_idx ON :SCHEMA.:ORG_ABBREVGoTermSummary (aa_sequence_id, go_id, source) ; - - - create index GoTermSum_plugin_ix ON :ORG_ABBREVGoTermSummary + create index :SCHEMA.:ORG_ABBREVGoTermSum_plugin_ix ON :SCHEMA.:ORG_ABBREVGoTermSummary (ontology, gene_source_id, is_not, is_go_slim, go_id, go_term_name, evidence_code, evidence_category) - - ; diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql index 0d38e419d9..e844e8da5b 100644 --- a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql +++ b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql @@ -1,16 +1,8 @@ - - - CREATE INDEX iuc_srcid_ix - ON IntronUtrCoords (source_id, na_feature_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREViuc_srcid_ix + ON :SCHEMA.:ORG_ABBREVIntronUtrCoords (source_id, na_feature_id) ; - - - CREATE INDEX iuc_nfid_ix - ON IntronUtrCoords (na_feature_id, source_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREViuc_nfid_ix + ON :SCHEMA.:ORG_ABBREVIntronUtrCoords (na_feature_id, source_id) ; diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql index 025dd1f8b3..6a25756c1e 100644 --- a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql +++ b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql @@ -1,6 +1,3 @@ - - - create index namemappinggij_ix on NameMappingGIJ (junctions_pan_id,exp_pan_id) - + create index :SCHEMA.:ORG_ABBREVnamemappinggij_ix on :SCHEMA.:ORG_ABBREVNameMappingGIJ (junctions_pan_id,exp_pan_id) ; diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql index 97944361b3..5490dbfaad 100644 --- a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql +++ b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql @@ -1,6 +1,7 @@ :CREATE_AND_POPULATE - select tn.name as organism, o.name_for_filenames - from apidb.Organism o, sres.TaxonName tn + select tn.name as organism, o.name_for_filenames, + :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date + from apidb.Organism o, sres.TaxonName tn where o.taxon_id = tn.taxon_id and tn.name_class = 'scientific name' and o.taxon_id = :TAXON_ID diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql index f098098f71..cee5ffe550 100644 --- a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql +++ b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql @@ -1,17 +1,18 @@ :CREATE_AND_POPULATE - SELECT organism, parent, abbreviation, substr(project_id, 1, 20) as project_id + select sub.*, :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date + from ( + SELECT organism, parent, abbreviation FROM OrganismAbbreviationWS UNION -- all familes for popsets SELECT DISTINCT family_name_for_files || ' Popset/Genbank Isolates' as organism, '' as parent, - family_name_for_files as abbreviation, substr(project_name, 1, 20) as project_id + family_name_for_files as abbreviation FROM apidb.Organism WHERE family_name_for_files is not null AND abbrev || '_isolates_genbank_RSRC' IN (SELECT external_db_name as db_name FROM PopsetAttributes) AND family_name_for_files NOT IN ('Culicosporidae', 'Dubosqiidae', 'Ordosporidae') UNION - SELECT special.organism, special.parent, special.abbreviation, - substr(ot.project_id, 1, 20) as project_id + SELECT special.organism, special.parent, special.abbreviation FROM OrganismTree ot, ( -- all species and speciesAbbreviations from apidb.Organism where we have ests SELECT DISTINCT @@ -33,7 +34,7 @@ 'Cryptosporidium' as parent, 'CryptosporidiidaeReference' as abbreviation ) special WHERE special.parent = ot.term - + ) sub :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql index 5414446561..1a38fed293 100644 --- a/Model/lib/psql/webtables/MO/OrganismAttributes.psql +++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql @@ -1,6 +1,6 @@ - CREATE UNLOGGED TABLE DataSourceCount AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVDataSourceCount AS SELECT taxon_id, max(CASE WHEN stype = 'organellar' THEN num ELSE null END) as organellar_has, @@ -42,7 +42,7 @@ - CREATE UNLOGGED TABLE OrganismCentromere AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVOrganismCentromere AS SELECT distinct s.taxon_id, case when count(*) > 0 then 1 else 0 end as hasCentromere FROM DOTS.MISCELLANEOUS f @@ -57,7 +57,7 @@ - CREATE UNLOGGED TABLE SequenceCount AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVSequenceCount AS SELECT taxon_id, max(CASE WHEN sequence_type = 'contig' THEN num ELSE null END) as contig_num, @@ -75,7 +75,7 @@ - CREATE UNLOGGED TABLE CommunityCount AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVCommunityCount AS -- SELECT taxon_id, count(*) as communityCount -- TODO: addd this back select taxon_id, 0 as communityCount @@ -94,7 +94,7 @@ - CREATE UNLOGGED TABLE ProfileCount AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProfileCount AS SELECT ga.taxon_id, count(distinct(case when p.dataset_type = 'transcript_expression' and p.dataset_subtype like '%rt_pcr%' @@ -122,7 +122,7 @@ - CREATE UNLOGGED TABLE PopsetCount AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPopsetCount AS SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id FROM ( (SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id @@ -154,7 +154,7 @@ - CREATE UNLOGGED TABLE GeneCount AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGeneCount AS SELECT genomestat.taxon_id, genomestat.project_id, genomestat.database_version, @@ -300,13 +300,13 @@ FROM apidb.Organism o INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id - LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id - LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id - LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id - LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id - LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id - LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id - LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id + LEFT JOIN :SCHEMA.:ORG_ABBREVDataSourceCount dsc ON o.taxon_id = dsc.taxon_id + LEFT JOIN :SCHEMA.:ORG_ABBREVOrganismCentromere oc ON o.taxon_id = oc.taxon_id + LEFT JOIN :SCHEMA.:ORG_ABBREVSequenceCount sc ON o.taxon_id = sc.taxon_id + LEFT JOIN :SCHEMA.:ORG_ABBREVCommunityCount cc ON o.taxon_id = cc.taxon_id + LEFT JOIN :SCHEMA.:ORG_ABBREVGeneCount gc ON o.taxon_id = gc.taxon_id + LEFT JOIN :SCHEMA.:ORG_ABBREVpopsetCount psc ON o.taxon_id = psc.taxon_id + LEFT JOIN :SCHEMA.:ORG_ABBREVprofileCount pc ON o.taxon_id = pc.taxon_id LEFT JOIN ( SELECT taxon_id, round(avg(length),1) as avg_transcript_length FROM TranscriptAttributes @@ -325,3 +325,10 @@ :DECLARE_PARTITION; +drop table :SCHEMA.:ORG_ABBREVDataSourceCount; +drop table :SCHEMA.:ORG_ABBREVOrganismCentromere; +drop table :SCHEMA.:ORG_ABBREVProfileCount; +drop table :SCHEMA.:ORG_ABBREVPopsetCount; +drop table :SCHEMA.:ORG_ABBREVGeneCount; +drop table :SCHEMA.:ORG_ABBREVSequenceCount; +drop table :SCHEMA.:ORG_ABBREVCommunityCount; diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql index b40126a71e..27ab99b34a 100644 --- a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql @@ -1,7 +1,3 @@ - - -create unique index Organism_sourceId_idx ON OrganismAttributes (source_id) - - +create unique index :SCHEMA.:ORG_ABBREVOrganism_sourceId_idx ON :SCHEMA.:ORG_ABBREVOrganismAttributes (source_id) ; diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql index 75792a7940..d37233b9b2 100644 --- a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql +++ b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql @@ -26,7 +26,10 @@ AND tn1.taxon_id = o.taxon_id ) SELECT - organisms.organism + :PROJECT_ID as project_id + , :ORG_ABBREV as org_abbrev + , current_timestamp as modification_date + , organisms.organism , organisms.public_abbrev , coalesce(phylum.parent_organism, 'N/A') as phylum , coalesce(genus.parent_organism, 'N/A') as genus diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql index 776a8f5ba5..fe2988a30e 100644 --- a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql +++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql @@ -1,9 +1,6 @@ - - - create index pgt_ix on PathwaysGeneTable + create index :SCHEMA.:ORG_ABBREVpgt_ix on :SCHEMA.:ORG_ABBREVPathwaysGeneTable (gene_source_id, project_id, pathway_source_id, pathway_name, reactions, enzyme, expasy_url, pathway_source, exact_match) - ; diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql index f9f068e65a..82054a5477 100644 --- a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql +++ b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql @@ -1,22 +1,14 @@ - - - create index psamp_ix - on ProfileSamples + create :SCHEMA.:ORG_ABBREVindex psamp_ix + on :SCHEMA.:ORG_ABBREVProfileSamples (dataset_name, profile_type, study_id, node_order_num, protocol_app_node_id, profile_set_suffix, study_name, node_type, protocol_app_node_name) - - ; - - - create index psampstdy_ix - on ProfileSamples + create index :SCHEMA.:ORG_ABBREVpsampstdy_ix + on :SCHEMA.:ORG_ABBREVProfileSamples (study_name, node_type, profile_type, node_order_num, protocol_app_node_id, profile_set_suffix, study_id, protocol_app_node_name, dataset_name) - - ; diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/MO/Profile_ix.psql index 25c977a2e7..0839baa093 100644 --- a/Model/lib/psql/webtables/MO/Profile_ix.psql +++ b/Model/lib/psql/webtables/MO/Profile_ix.psql @@ -1,24 +1,12 @@ - - - create index exprof_idx - on Profile (source_id, profile_type, profile_set_name) - - + create index :SCHEMA.:ORG_ABBREVexprof_idx + on :SCHEMA.:ORG_ABBREVProfile (source_id, profile_type, profile_set_name) ; - - - create index profset_idx - on Profile (profile_set_name, profile_type) - - + create index :SCHEMA.:ORG_ABBREVprofset_idx + on :SCHEMA.:ORG_ABBREVProfile (profile_set_name, profile_type) ; - - - create index srcdset_idx - on Profile (source_id, dataset_subtype, dataset_type) - - + create index :SCHEMA.:ORG_ABBREVsrcdset_idx + on :SCHEMA.:ORG_ABBREVProfile (source_id, dataset_subtype, dataset_type) ; diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql index de8bb59b53..b6c1dba255 100644 --- a/Model/lib/psql/webtables/MO/ProteinAttributes.psql +++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql @@ -1,6 +1,6 @@ - CREATE TABLE :ORG_ABBREVGoTermList AS + CREATE UNLOGGED TABLE :ORG_ABBREVGoTermList AS SELECT aa_sequence_id, ontology, source, string_agg(go_term_name, ';' ORDER BY go_term_name) AS go_terms, string_agg(go_id, ';' ORDER BY go_term_name) AS go_ids @@ -15,7 +15,7 @@ - CREATE TABLE :ORG_ABBREVProteinGoAttributes AS + CREATE UNLOGGED TABLE :ORG_ABBREVProteinGoAttributes AS SELECT DISTINCT gts.aa_sequence_id, substr(annotated_go_component.go_terms, 1, 300) AS annotated_go_component, substr(annotated_go_function.go_terms, 1, 300) AS annotated_go_function, @@ -85,7 +85,7 @@ - CREATE TABLE :ORG_ABBREVtProteinAttrsEc AS + CREATE UNLOGGED TABLE :ORG_ABBREVtProteinAttrsEc AS SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers FROM (SELECT DISTINCT asec.aa_sequence_id, ec.ec_number || ' (' || ec.description || ')' AS ec_number @@ -99,7 +99,7 @@ - CREATE TABLE :ORG_ABBREVtProteinAttrsEcDerived AS + CREATE UNLOGGED TABLE :ORG_ABBREVtProteinAttrsEcDerived AS SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived FROM (SELECT DISTINCT asec.aa_sequence_id, ec.ec_number || ' (' || ec.description || ')' AS ec_number @@ -205,3 +205,7 @@ ; +drop table :ORG_ABBREVGoTermList; +drop table :ORG_ABBREVProteinGoAttributes; +drop table :ORG_ABBREVtProteinAttrsEc; +drop table :ORG_ABBREVtProteinAttrsEcDerived; \ No newline at end of file diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql index f9899e7f80..9279d85729 100644 --- a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql @@ -1,14 +1,6 @@ - - - CREATE INDEX PA_sourceId ON :ORG_ABBREVProteinAttributes (source_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVPA_sourceId ON :SCHEMA.:ORG_ABBREVProteinAttributes (source_id) ; - - - - CREATE INDEX PA_aaSequenceId ON :ORG_ABBREVProteinAttributes (aa_sequence_id) - + CREATE INDEX :SCHEMA.:ORG_ABBREVPA_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinAttributes (aa_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql index 98f1c06b2d..7f37d152ef 100644 --- a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql @@ -1,7 +1,3 @@ - - - create index ProtSeq_ix on ProteinSequence (source_id, project_id) - - + create index :SCHEMA.:ORG_ABBREVProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql index 3993db26d3..a7fc7c39b4 100644 --- a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql +++ b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql @@ -1,5 +1,3 @@ - - CREATE TABLE :ORG_ABBREVSequencePieceClosure AS SELECT sp.sequence_piece_id, sp.virtual_na_sequence_id, diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql index d5b2c93f6c..ddd24897a3 100644 --- a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql +++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql @@ -1,16 +1,8 @@ - - CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP1_ix ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (aa_sequence_id) - - ; - - CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP2_ix ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max) - - ; diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql index c350a43aa8..7563dac0c5 100644 --- a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql +++ b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql @@ -1,14 +1,6 @@ - - - create index tfbs_geneid_idx ON TFBSGene (gene_source_id, tfbs_na_feature_id) - - + create index :SCHEMA.:ORG_ABBREVtfbs_geneid_idx ON :SCHEMA.:ORG_ABBREVTFBSGene (gene_source_id, tfbs_na_feature_id) ; - - - create index geneid_tfbs_idx ON TFBSGene (tfbs_na_feature_id,gene_source_id) - - + create index :SCHEMA.:ORG_ABBREVgeneid_tfbs_idx ON :SCHEMA.:ORG_ABBREVTFBSGene (tfbs_na_feature_id,gene_source_id) ; diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webtables/MO/TaxonSpecies.psql index 79a6023d27..d0aa9578a2 100644 --- a/Model/lib/psql/webtables/MO/TaxonSpecies.psql +++ b/Model/lib/psql/webtables/MO/TaxonSpecies.psql @@ -10,7 +10,10 @@ FROM cte, sres.taxon sub WHERE cte.parent_id = sub.taxon_id ) - SELECT c.taxon_id, c.parent_id as species_taxon_id + SELECT c.taxon_id, c.parent_id as species_taxon_id, + :PROJECT_ID as project_id, + :ORG_ABBREV as org_abbrev, + current_timestamp as modification_date FROM cte c, sres.taxon t WHERE t.taxon_id = c.parent_id AND t.rank='species' diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webtables/MO/Taxonomy.psql index cbc2cbc0fa..8fe3bf229c 100644 --- a/Model/lib/psql/webtables/MO/Taxonomy.psql +++ b/Model/lib/psql/webtables/MO/Taxonomy.psql @@ -17,6 +17,7 @@ WHERE cte.parent_id = tax.taxon_id AND tax.name != 'root' ) - SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum + SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum, + :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, current_timestamp as modification_date FROM (SELECT cte.* FROM cte ORDER BY path) t :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql index b5e2d3fb41..b3e8518849 100644 --- a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql +++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql @@ -1,9 +1,5 @@ - - - create index :SCHEMA.tax_ix - on :SCHEMA.Taxonomy + create index :SCHEMA.:ORG_ABBREVtax_ix + on :SCHEMA.:ORG_ABBREVTaxonomy (organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank) - - ; diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql index 388d722537..5ac1d5fd67 100644 --- a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql @@ -438,3 +438,4 @@ ; +drop table :ORG_ABBREVTranscriptUniprot \ No newline at end of file diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql index bcb9c3e57f..481959c6d7 100644 --- a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql @@ -1,135 +1,71 @@ - - - CREATE UNIQUE INDEX TranscriptAttr_sourceId + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_sourceId ON :ORG_ABBREVTranscriptAttributes (source_id) - + ; - ; - - - - CREATE UNIQUE INDEX TranscriptAttr_srcPrj + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_srcPrj ON :ORG_ABBREVTranscriptAttributes (source_id, gene_source_id, project_id) - - ; - - - CREATE UNIQUE INDEX TranscriptAttr_genesrc + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_genesrc ON :ORG_ABBREVTranscriptAttributes (gene_source_id, source_id, project_id) - - ; - - - CREATE UNIQUE INDEX TranscriptAttr_exon_ix + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_exon_ix ON :ORG_ABBREVTranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id) - - ; - - - CREATE UNIQUE INDEX TranscriptAttr_loc_ix - ON :ORG_ABBREVTranscriptAttributes + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_loc_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id, is_deprecated, source_id, gene_source_id, project_id) - - ; - - - CREATE UNIQUE INDEX TranscriptAttr_feat_ix - ON :ORG_ABBREVTranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id) - - + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_feat_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id) ; - - - CREATE UNIQUE INDEX TranscriptAttr_geneid_ix - ON :ORG_ABBREVTranscriptAttributes (gene_id, source_id, gene_source_id, project_id) - - + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_geneid_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_id, source_id, gene_source_id, project_id) ; - - - CREATE UNIQUE INDEX TransAttr_orthoname_ix - ON :ORG_ABBREVTranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id) - - + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTransAttr_orthoname_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id) ; - - - CREATE UNIQUE INDEX TransAttr_molwt_ix - ON :ORG_ABBREVTranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id) - - + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTransAttr_molwt_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id) ; - - - CREATE INDEX TransAttr_ortholog_ix - ON :ORG_ABBREVTranscriptAttributes + CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_ortholog_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id) - - ; - - - CREATE INDEX TransAttr_orgsrc_ix - ON :ORG_ABBREVTranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_orgsrc_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max) ; - - - CREATE INDEX TransAttr_lwrsrc_ix - ON :ORG_ABBREVTranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_lwrsrc_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id) ; - - - CREATE INDEX TransAttr_species_ix - ON :ORG_ABBREVTranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id) - - + CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_species_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id) ; - - CREATE UNIQUE INDEX TrnscrptAttr_geneinfo - ON :ORG_ABBREVTranscriptAttributes + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id, protein_source_id, na_sequence_id, length, protein_length, five_prime_utr_length, three_prime_utr_length) - - ; - - - CREATE UNIQUE INDEX TranscriptAttr_genenaf - ON :ORG_ABBREVTranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id) - - + CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_genenaf + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id) ; - - - CREATE INDEX TransAttr_locsIds_ix - ON :ORG_ABBREVTranscriptAttributes + CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_locsIds_ix + ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id) - - ; diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql index e6630dae0d..8b77d66964 100644 --- a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql @@ -1,8 +1,4 @@ - - - create index GCent_loc_ix - on TranscriptCenDistance (genomic_sequence, centromere_distance) - - + create index :SCHEMA.:ORG_ABBREVGCent_loc_ix + on :SCHEMA.:ORG_ABBREVTranscriptCenDistance (genomic_sequence, centromere_distance) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql index 7587695697..26773299a5 100644 --- a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql @@ -1,18 +1,12 @@ - - - create index TranscriptPath_ix - on TranscriptPathway (gene_source_id, source_id, pathway_source_id, - pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway, - ec_number_pathway, pathway_source) - - - ; - - - - create index TranscriptPathSource_ix - on TranscriptPathway (pathway_source, gene_source_id, source_id) - - + create index :SCHEMA.:ORG_ABBREVTranscriptPath_ix + on :SCHEMA.:ORG_ABBREVTranscriptPathway + (gene_source_id, source_id, pathway_source_id, + pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway, + ec_number_pathway, pathway_source) + ; + + create index :SCHEMA.:ORG_ABBREVTranscriptPathSource_ix + on :SCHEMA.:ORG_ABBREVTranscriptPathway (pathway_source, + gene_source_id, source_id) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql index 079e8faf28..73f233d48e 100644 --- a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql @@ -1,7 +1,3 @@ - - - create index XScriptSeq_ix on :ORG_ABBREVTranscriptSequence (source_id, project_id) - - + create index :SCHEMA.:ORG_ABBREVXScriptSeq_ix on :SCHEMA.:ORG_ABBREVTranscriptSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql index f6aea03de7..e6d76b992a 100644 --- a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql +++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql @@ -1,8 +1,4 @@ - - create index :SCHEMA.:ORG_ABBREVTransDom1_ix on :SCHEMA.:ORG_ABBREVTransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology) - - ; From 37285ee751d7b1413e0f8570671f8cdb5d71ce3a Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Mon, 12 May 2025 17:19:30 -0400 Subject: [PATCH 007/112] wip --- .../psql/webtables/MO/ProteinAttributes.psql | 67 +++++---- .../psql/webtables/MO/ProteinSequence.psql | 14 +- .../webtables/MO/TranscriptAttributes.psql | 134 ++++++++++-------- Model/lib/xml/tuningManager/webtables.org | 8 +- 4 files changed, 119 insertions(+), 104 deletions(-) diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql index b6c1dba255..ad6c7cd95b 100644 --- a/Model/lib/psql/webtables/MO/ProteinAttributes.psql +++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql @@ -1,13 +1,15 @@ - CREATE UNLOGGED TABLE :ORG_ABBREVGoTermList AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGoTermList_tmp AS SELECT aa_sequence_id, ontology, source, string_agg(go_term_name, ';' ORDER BY go_term_name) AS go_terms, string_agg(go_id, ';' ORDER BY go_term_name) AS go_ids FROM ( SELECT aa_sequence_id, ontology, CASE evidence_code WHEN 'IEA' THEN 'predicted' ELSE 'annotated' END AS source, go_term_name, go_id - FROM :ORG_ABBREVGeneGoTerms + FROM :SCHEMA.GeneGoTerms + WHERE org_abbrev = ':ORG_ABBREV' + ) t GROUP BY aa_sequence_id, ontology, source @@ -15,7 +17,7 @@ - CREATE UNLOGGED TABLE :ORG_ABBREVProteinGoAttributes AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp AS SELECT DISTINCT gts.aa_sequence_id, substr(annotated_go_component.go_terms, 1, 300) AS annotated_go_component, substr(annotated_go_function.go_terms, 1, 300) AS annotated_go_function, @@ -30,44 +32,44 @@ substr(predicted_go_function.go_ids, 1, 300) AS predicted_go_id_function, substr(predicted_go_process.go_ids, 1, 300) AS predicted_go_id_process FROM - (SELECT DISTINCT aa_sequence_id FROM :ORG_ABBREVGoTermSummary) gts + (SELECT DISTINCT aa_sequence_id FROM :SCHEMA.GoTermSummary where org_abbrev = ':ORG_ABBREV') gts LEFT JOIN ( - SELECT * FROM :ORG_ABBREVGoTermList + SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp WHERE source = 'annotated' AND ontology = 'Cellular Component' ) annotated_go_component ON gts.aa_sequence_id = annotated_go_component.aa_sequence_id AND 'annotated' = annotated_go_component.source AND 'Cellular Component' = annotated_go_component.ontology LEFT JOIN ( - SELECT * FROM :ORG_ABBREVGoTermList + SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp WHERE source = 'annotated' AND ontology = 'Molecular Function' ) annotated_go_function ON gts.aa_sequence_id = annotated_go_function.aa_sequence_id AND 'annotated' = annotated_go_function.source AND 'Molecular Function' = annotated_go_function.ontology LEFT JOIN ( - SELECT * FROM :ORG_ABBREVGoTermList + SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp WHERE source = 'annotated' AND ontology = 'Biological Process' ) annotated_go_process ON gts.aa_sequence_id = annotated_go_process.aa_sequence_id AND 'annotated' = annotated_go_process.source AND 'Biological Process' = annotated_go_process.ontology LEFT JOIN ( - SELECT * FROM :ORG_ABBREVGoTermList + SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp WHERE source = 'predicted' AND ontology = 'Cellular Component' ) predicted_go_component ON gts.aa_sequence_id = predicted_go_component.aa_sequence_id AND 'predicted' = predicted_go_component.source AND 'Cellular Component' = predicted_go_component.ontology LEFT JOIN ( - SELECT * FROM :ORG_ABBREVGoTermList + SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp WHERE source = 'predicted' AND ontology = 'Molecular Function' ) predicted_go_function ON gts.aa_sequence_id = predicted_go_function.aa_sequence_id AND 'predicted' = predicted_go_function.source AND 'Molecular Function' = predicted_go_function.ontology LEFT JOIN ( - SELECT * FROM :ORG_ABBREVGoTermList + SELECT * FROM :SCHEMA.:ORG_ABBREVGoTermList_tmp WHERE source = 'predicted' AND ontology = 'Biological Process' ) predicted_go_process ON gts.aa_sequence_id = predicted_go_process.aa_sequence_id @@ -78,19 +80,21 @@ - create index ProteinGoAttr_aaSequenceId ON :ORG_ABBREVProteinGoAttributes (aa_sequence_id) + create index :SCHEMA.:ORG_ABBREVProteinGoAttr_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id) ; - CREATE UNLOGGED TABLE :ORG_ABBREVtProteinAttrsEc AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp AS SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers FROM (SELECT DISTINCT asec.aa_sequence_id, ec.ec_number || ' (' || ec.description || ')' AS ec_number - FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec + FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq WHERE ec.enzyme_class_id = asec.enzyme_class_id + AND seq.aa_sequence_id = asec.aa_sequence_id + AND seq.taxon_id = :TAXON_ID AND NOT asec.evidence_code = 'OrthoMCLDerived' ) t GROUP BY aa_sequence_id @@ -99,23 +103,26 @@ - CREATE UNLOGGED TABLE :ORG_ABBREVtProteinAttrsEcDerived AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived FROM (SELECT DISTINCT asec.aa_sequence_id, ec.ec_number || ' (' || ec.description || ')' AS ec_number - FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec + FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq WHERE ec.enzyme_class_id = asec.enzyme_class_id + AND seq.aa_sequence_id = asec.aa_sequence_id + AND seq.taxon_id = :TAXON_ID AND asec.evidence_code = 'OrthoMCLDerived' ) t GROUP BY aa_sequence_id ; -:CREATE_AND_POPULATE - - CREATE TABLE :ORG_ABBREVProteinAttributes AS - SELECT pi.name as project_id, +-- TODO: Filter the subqueries or break into tmp tables for performance +:CREATE_AND_POPULATE + SELECT ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, + current_timestamp as modification_date, tas.source_id, tas.aa_sequence_id, t.source_id as transcript_source_id, gf.source_id as gene_source_id, @@ -150,18 +157,18 @@ row_number() over (partition by t.source_id order by tas.length desc) as rank_in_transcript, uniprot.uniprot_ids FROM - core.ProjectInfo pi - INNER JOIN dots.Transcript t ON t.row_project_id = pi.project_id + dots.Transcript t, INNER JOIN dots.GeneFeature gf ON gf.na_feature_id = t.parent_id + INNER JOIN dots.nasequence nas ON gf.na_sequence_id = nas.na_sequence_id AND nas.taxon_id = :TAXON_ID INNER JOIN dots.TranslatedAaFeature taf ON t.na_feature_id = taf.na_feature_id INNER JOIN dots.TranslatedAaSequence tas ON taf.aa_sequence_id = tas.aa_sequence_id LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id LEFT JOIN apidb.AaSequenceAttribute asa ON taf.aa_sequence_id = asa.aa_sequence_id - LEFT JOIN :ORG_ABBREVProteinGoAttributes go ON tas.aa_sequence_id = go.aa_sequence_id + LEFT JOIN :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp go ON tas.aa_sequence_id = go.aa_sequence_id LEFT JOIN ( SELECT aa_sequence_id, string_agg(peptide_sequence, ', ') peptide_sequence - FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :ORG_ABBREVSignalPeptideDomains) t + FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :SCHEMA.:ORG_ABBREVSignalPeptideDomains) t GROUP BY aa_sequence_id ) sigp ON tas.aa_sequence_id = sigp.aa_sequence_id LEFT JOIN ( @@ -178,8 +185,8 @@ GROUP BY tmaf.aa_sequence_id) tms GROUP BY tms.aa_sequence_id ) transmembrane ON tas.aa_sequence_id = transmembrane.aa_sequence_id - LEFT JOIN :ORG_ABBREVProteinAttrsEc ec ON tas.aa_sequence_id = ec.aa_sequence_id - LEFT JOIN :ORG_ABBREVProteinAttrsEcDerived ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id + LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp ec ON tas.aa_sequence_id = ec.aa_sequence_id + LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id LEFT JOIN ( SELECT af.aa_sequence_id, string_agg(dbref.primary_identifier, ',' order by dbref.primary_identifier) as uniprot_ids @@ -199,13 +206,13 @@ - update :ORG_ABBREVProteinAttributes gaup + update :SCHEMA.:ORG_ABBREVProteinAttributes gaup set has_seqedit = 1 where source_id in (select source_id from apidb.seqedit) ; -drop table :ORG_ABBREVGoTermList; -drop table :ORG_ABBREVProteinGoAttributes; -drop table :ORG_ABBREVtProteinAttrsEc; -drop table :ORG_ABBREVtProteinAttrsEcDerived; \ No newline at end of file +drop table :SCHEMA.:ORG_ABBREVGoTermList_tmp; +drop table :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp; +drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp; +drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp; diff --git a/Model/lib/psql/webtables/MO/ProteinSequence.psql b/Model/lib/psql/webtables/MO/ProteinSequence.psql index 937e695a3f..9517f9436b 100644 --- a/Model/lib/psql/webtables/MO/ProteinSequence.psql +++ b/Model/lib/psql/webtables/MO/ProteinSequence.psql @@ -1,15 +1,9 @@ :CREATE_AND_POPULATE - - - CREATE TABLE ProteinSequence AS WITH pAttr AS ( SELECT distinct source_id, aa_sequence_id - FROM ProteinAttributes) - SELECT pa.source_id, pi.name AS project_id, tas.sequence - FROM pAttr pa, dots.TranslatedAaSequence tas, core.Projectinfo pi + FROM ProteinAttributes where org_abbrev = ':ORG_ABBREV') + SELECT pa.source_id, tas.sequence, + :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, current_timestamp as modification_date + FROM pAttr pa, dots.TranslatedAaSequence tas WHERE pa.aa_sequence_id = tas.aa_sequence_id - AND pi.project_id = tas.row_project_id - - :DECLARE_PARTITION; - diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql index 5ac1d5fd67..069c605c63 100644 --- a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql @@ -1,13 +1,17 @@ - +\ - CREATE table :ORG_ABBREVTranscriptUniprot AS + CREATE table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp AS select na_feature_id, substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id, substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal FROM (SELECT DISTINCT t.na_feature_id, dr.primary_identifier as uniprot_id FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t, + dots.genefeature gf, dots.nasequence nas sres.ExternalDatabase d, sres.ExternalDatabaseRelease r WHERE dr.db_ref_id = x.DB_REF_ID + AND t.parent_id = gf.na_feature_id + and gf.na_sequence_id = nas.na_sequence_id + and nas.taxon_id = :TAXON_ID AND (x.na_feature_id = t.na_feature_id -- or x.na_feature_id = t.parent_id) AND dr.external_database_release_id = r.external_database_release_id @@ -23,12 +27,8 @@ ; :CREATE_AND_POPULATE - - - CREATE TABLE :ORG_ABBREVTranscriptAttributes AS WITH genefeat AS ( SELECT DISTINCT - cast(apidb.prefixed_project_id(tn.name, ':ORG_ABBREV') as varchar(20)) as project_id, -- first the gene attributes: gf.source_id AS gene_source_id, gf.na_feature_id AS gene_na_feature_id, @@ -50,12 +50,12 @@ GREATEST(1, least(nl.start_min, nl.end_max) - 1500) AS gene_zoom_context_start, LEAST(gsa.length, greatest(nl.start_min, nl.end_max) + 1500) AS gene_zoom_context_end, CAST(orthologs.name AS VARCHAR(60)) AS orthomcl_name, - coalesce(tothtssnps.total_hts_snps,0) AS gene_total_hts_snps, - coalesce(tothtssnps.hts_nonsynonymous_snps,0) AS gene_hts_nonsynonymous_snps, - coalesce(tothtssnps.hts_stop_codon_snps,0) AS gene_hts_stop_codon_snps, - coalesce(tothtssnps.hts_noncoding_snps,0) AS gene_hts_noncoding_snps, - coalesce(tothtssnps.hts_synonymous_snps,0) AS gene_hts_synonymous_snps, - coalesce(tothtssnps.hts_nonsyn_syn_ratio,0) AS gene_hts_nonsyn_syn_ratio, + -- coalesce(tothtssnps.total_hts_snps,0) AS gene_total_hts_snps, + -- coalesce(tothtssnps.hts_nonsynonymous_snps,0) AS gene_hts_nonsynonymous_snps, + -- coalesce(tothtssnps.hts_stop_codon_snps,0) AS gene_hts_stop_codon_snps, + -- coalesce(tothtssnps.hts_noncoding_snps,0) AS gene_hts_noncoding_snps, + -- coalesce(tothtssnps.hts_synonymous_snps,0) AS gene_hts_synonymous_snps, + -- coalesce(tothtssnps.hts_nonsyn_syn_ratio,0) AS gene_hts_nonsyn_syn_ratio, CAST(cmnt.comment_string AS VARCHAR(300)) AS comment_string, entrez_table.entrez_id AS gene_entrez_id, gloc.locations AS gene_locations, @@ -76,21 +76,22 @@ gsa.sequence_type, gsa.chromosome_order_num, gsa.na_sequence_id FROM dots.GeneFeature gf + INNER JOIN dots.nasequence seq ON seq.na_sequence_id = gf.na_sequence_id and nas.taxon_id = :TAXON_ID INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id - INNER JOIN :ORG_ABBREVGeneLocations gloc ON gf.source_id = gloc.source_id - LEFT JOIN :ORG_ABBREVGeneProduct gp ON gf.source_id = gp.source_id + INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV' + LEFT JOIN :SCHEMA.GeneProduct gp ON gf.source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV' INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id - INNER JOIN :ORG_ABBREVGenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id + INNER JOIN :SCHEMA.GenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id and gsa.org_abbrev = ':ORG_ABBREV' INNER JOIN sres.TaxonName tn ON gsa.taxon_id = tn.taxon_id INNER JOIN sres.Taxon ON gsa.taxon_id = taxon.taxon_id INNER JOIN sres.externalDatabaseRelease soRls ON so.external_database_release_id = soRls.external_database_release_id INNER JOIN ( - SELECT DISTINCT gene AS source_id FROM :ORG_ABBREVGeneId + SELECT DISTINCT gene AS source_id FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV' ) gene ON gf.source_id = gene.source_id LEFT JOIN dots.RnaType rt2 ON gf.na_feature_id = rt2.parent_id - LEFT JOIN :ORG_ABBREVTaxonSpecies ts ON gsa.taxon_id = ts.taxon_id + LEFT JOIN :SCHEMA.TaxonSpecies ts ON gsa.taxon_id = ts.taxon_id and ts.org_abbrev = ':ORG_ABBREV' LEFT JOIN dots.geneinstance gi ON gf.na_feature_id = gi.na_feature_id INNER JOIN sres.TaxonName species_name ON ts.species_taxon_id = species_name.taxon_id LEFT JOIN ( @@ -116,23 +117,24 @@ AND edr.external_database_id = ed.external_database_id AND ed.name = 'gassAWB_dbxref_gene2Deprecated_RSRC' ) deprecated ON gf.na_feature_id = deprecated.gene_na_feature_id - LEFT JOIN ( - SELECT gene_source_id, total_hts_snps, hts_nonsynonymous_snps, hts_stop_codon_snps,hts_noncoding_snps,hts_synonymous_snps, - case when (hts_nonsynonymous_snps is null) then 0 - when (hts_synonymous_snps = 0) then 0 - else round ((hts_nonsynonymous_snps/ hts_synonymous_snps), 2) end as hts_nonsyn_syn_ratio - FROM ( - select gene_source_id, - count(*) as total_hts_snps, - sum(has_nonsynonymous_allele) as hts_nonsynonymous_snps, - sum(has_stop_codon) as hts_stop_codon_snps, - sum(is_noncoding_snp) as hts_noncoding_snps, - count(*) - sum(has_nonsynonymous_allele) - sum(has_stop_codon) - sum(is_noncoding_snp) as hts_synonymous_snps - FROM :ORG_ABBREVSnpAttributes - WHERE gene_source_id is not null - GROUP by gene_source_id - ) t - ) tothtssnps ON gf.source_id = tothtssnps.gene_source_id + -- NEED NEW TUNING TABLE ONCE NGS SNP WORK IS DONE + -- LEFT JOIN ( + -- SELECT gene_source_id, total_hts_snps, hts_nonsynonymous_snps, hts_stop_codon_snps,hts_noncoding_snps,hts_synonymous_snps, + -- case when (hts_nonsynonymous_snps is null) then 0 + -- when (hts_synonymous_snps = 0) then 0 + -- else round ((hts_nonsynonymous_snps/ hts_synonymous_snps), 2) end as hts_nonsyn_syn_ratio + -- FROM ( + -- select gene_source_id, + -- count(*) as total_hts_snps, + -- sum(has_nonsynonymous_allele) as hts_nonsynonymous_snps, + -- sum(has_stop_codon) as hts_stop_codon_snps, + -- sum(is_noncoding_snp) as hts_noncoding_snps, + -- count(*) - sum(has_nonsynonymous_allele) - sum(has_stop_codon) - sum(is_noncoding_snp) as hts_synonymous_snps + -- FROM :ORG_ABBREVSnpAttributes + -- WHERE gene_source_id is not null + -- GROUP by gene_source_id + -- ) t + -- ) tothtssnps ON gf.source_id = tothtssnps.gene_source_id LEFT JOIN ( SELECT ssg.sequence_id as gene_na_feature_id, sg.name FROM dots.SequenceSequenceGroup ssg, @@ -191,6 +193,7 @@ GROUP BY dbna.na_feature_id ) entrez_table ON gf.na_feature_id = entrez_table.na_feature_id LEFT JOIN ( + -- TODO: PERFORMANCE SELECT drnf.na_feature_id, substr(string_agg(dr.primary_identifier, ';' order by dr.primary_identifier), 1, 900) as old_ids FROM dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed @@ -208,7 +211,8 @@ AND (gf.is_predicted != 1 OR gf.is_predicted is null) AND tn.name_class = 'scientific name' AND tn.taxon_id NOT IN (SELECT o.taxon_id FROM apidb.Organism o WHERE o.is_annotated_genome=0) - AND tn.name not in ('Plasmodium gallinaceum','Plasmodium reichenowi') + -- JB: NOT SURE WHY THERE ARE COMMENTED. (if needed, move to model) + --AND tn.name not in ('Plasmodium gallinaceum','Plasmodium reichenowi') ), transcript AS ( SELECT DISTINCT t.parent_id as transcript_parent_id, @@ -233,6 +237,8 @@ utr_lengths.five_prime_utr_length, utr_lengths.three_prime_utr_length FROM dots.Transcript t + INNER JOIN dots.genefeature gf on t.parent_id = gf.na_feature_id + INNER JOIN dots.nasequence nas on gf.na_sequence_id = nas.na_sequence_id and nas.taxon_id = :TAXON_ID LEFT JOIN dots.RnaType rt1 ON t.na_feature_id = rt1.parent_id LEFT JOIN dots.SplicedNaSequence sns ON t.na_sequence_id = sns.na_sequence_id INNER JOIN sres.OntologyTerm tso ON t.sequence_ontology_id = tso.ontology_term_id @@ -256,7 +262,7 @@ GROUP BY parent_id ) three_prime ON t.na_feature_id = three_prime.parent_id ) utr_lengths ON t.na_feature_id = utr_lengths.transcript_na_feature_id - LEFT JOIN :ORG_ABBREVTranscriptUniprot transcript_uniprot ON t.na_feature_id = transcript_uniprot.na_feature_id + LEFT JOIN :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp transcript_uniprot ON t.na_feature_id = transcript_uniprot.na_feature_id LEFT JOIN ( SELECT na_feature_id, max(product) as product FROM apidb.TranscriptProduct @@ -272,7 +278,9 @@ tl.is_top_level=1 ) SELECT DISTINCT - genefeat.project_id, + ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, + current_timestamp as modification_date, transcript.transcript_source_id AS source_id, -- first the gene attributes: genefeat.gene_source_id, @@ -302,12 +310,12 @@ genefeat.gene_zoom_context_start, genefeat.gene_zoom_context_end, genefeat.orthomcl_name, - genefeat.gene_total_hts_snps, - genefeat.gene_hts_nonsynonymous_snps, - genefeat.gene_hts_stop_codon_snps, - genefeat.gene_hts_noncoding_snps, - genefeat.gene_hts_synonymous_snps, - genefeat.gene_hts_nonsyn_syn_ratio, + -- genefeat.gene_total_hts_snps, + -- genefeat.gene_hts_nonsynonymous_snps, + -- genefeat.gene_hts_stop_codon_snps, + -- genefeat.gene_hts_noncoding_snps, + -- genefeat.gene_hts_synonymous_snps, + -- genefeat.gene_hts_nonsyn_syn_ratio, genefeat.comment_string, transcript.uniprot_id, transcript.uniprot_id_internal, genefeat.gene_entrez_id, @@ -375,55 +383,60 @@ transcript.three_prime_utr_length FROM genefeat INNER JOIN transcript ON genefeat. gene_na_feature_id = transcript.transcript_parent_id - LEFT JOIN :ORG_ABBREVproteinattributes pa ON transcript.transcript_source_id = pa.transcript_source_id AND pa.rank_in_transcript = 1 + LEFT JOIN :SCHEMA.proteinattributes pa ON pa.org_abbrev = ':ORG_ABBREV' and transcript.transcript_source_id = pa.transcript_source_id AND pa.rank_in_transcript = 1 ORDER BY taxon_id, source_id :DECLARE_PARTITION; - - UPDATE :ORG_ABBREVTranscriptAttributes ta - SET exon_count = (SELECT count(*) + 1 FROM apidb.IntronLocation il WHERE il.parent_id = ta.na_feature_id AND il.end_max - il.start_min + 1 > 10 ) + -- ADDED ORG_ABBREV filter in 2 places just in case + UPDATE :SCHEMA.TranscriptAttributes ta + SET exon_count = (SELECT count(*) + 1 FROM apidb.IntronLocation il WHERE ta.org_abbrev = ':ORG_ABBREV' AND il.parent_id = ta.na_feature_id AND il.end_max - il.start_min + 1 > 10 ) WHERE ta.project_id = 'TriTrypDB' + AND ta.org_abbrev = ':ORG_ABBREV' ; - UPDATE :ORG_ABBREVTranscriptAttributes gaup + UPDATE :SCHEMA.TranscriptAttributes gaup SET gene_paralog_number = ( SELECT count(distinct gene_source_id) - FROM :ORG_ABBREVTranscriptAttributes g1 + FROM :SCHEMA.TranscriptAttributes g1 WHERE g1.orthomcl_name = gaup.orthomcl_name AND g1.organism = gaup.organism AND gaup.gene_source_id != g1.gene_source_id + AND g1.org_abbrev = ':ORG_ABBREV' ), gene_ortholog_number = ( SELECT count(distinct gene_source_id) - FROM :ORG_ABBREVTranscriptAttributes g1 + FROM :SCHEMA.TranscriptAttributes g1 WHERE g1.orthomcl_name = gaup.orthomcl_name AND g1.organism != gaup.organism + AND g1.org_abbrev = ':ORG_ABBREV' ) WHERE (gaup.gene_type = 'protein coding' or gaup.gene_type = 'protein coding gene') + and gaup.org_abbrev = ':ORG_ABBREV' ; - - UPDATE :ORG_ABBREVTranscriptAttributes - SET gene_id = gene_na_feature_id + (select coalesce(max(gene_id), 0) from dots.gene) +-- TODO: test this + UPDATE :SCHEMA.TranscriptAttributes + SET gene_id = gene_na_feature_id + (select nextval('dots.gene_sq')) WHERE gene_id is null + and org_abbrev = ':ORG_ABBREV' ; - - UPDATE :ORG_ABBREVTranscriptAttributes + -- TODO: THIS IS BROKEN. + UPDATE :SCHEMA.TranscriptAttributes SET representative_transcript = ( select min(source_id) - from :ORG_ABBREVTranscriptAttributes ga - where ga.gene_source_id = :ORG_ABBREVTranscriptAttributes .gene_source_id + from :SCHEMA.TranscriptAttributes ga + where ga.gene_source_id = :SCHEMA.TranscriptAttributes.gene_source_id ) WHERE representative_transcript is null AND gene_id is not null @@ -431,11 +444,12 @@ ; - + -- TODO: THIS IS BROKEN. UPDATE :ORG_ABBREVTranscriptAttributes SET representative_transcript = source_id WHERE representative_transcript is null ; - -drop table :ORG_ABBREVTranscriptUniprot \ No newline at end of file + -- TODO: THIS IS BROKEN. +drop table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp +; diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index 1265054f2e..4373beaeb9 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -30,10 +30,10 @@ - [X] TransmembraneDomains.psql - [X] PdbSimilarity_ix.psql - [X] PdbSimilarity.psql - - [ ] ProteinSequence_ix.psql - - [ ] ProteinSequence.psql - - [ ] ProteinAttributes_ix.psql - - [ ] ProteinAttributes.psql + - [X] ProteinSequence_ix.psql + - [X] ProteinSequence.psql + - [X] ProteinAttributes_ix.psql + - [X] ProteinAttributes.psql - [ ] TranscriptAttributes_ix.psql - [ ] TranscriptAttributes.psql - [ ] CodingSequence_ix.psql From b758e7cf387c6e2b33b94dd412a2055e8eedc2a7 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 13 May 2025 11:26:36 -0400 Subject: [PATCH 008/112] fix quotes and update some tables --- .../psql/webtables/MO/ChIPchipTranscript.psql | 11 ++++------- Model/lib/psql/webtables/MO/CodingSequence.psql | 11 ++++------- .../psql/webtables/MO/GenomicSeqAttributes.psql | 4 ++-- .../lib/psql/webtables/MO/GenomicSequenceId.psql | 2 +- .../webtables/MO/GenomicSequenceSequence.psql | 4 ++-- .../psql/webtables/MO/OrganismAbbreviation.psql | 2 +- .../webtables/MO/OrganismAbbreviationBlast.psql | 2 +- .../webtables/MO/OrganismSelectTaxonRank.psql | 4 ++-- Model/lib/psql/webtables/MO/PdbSimilarity.psql | 4 ++-- Model/lib/psql/webtables/MO/ProteinSequence.psql | 2 +- .../psql/webtables/MO/SignalPeptideDomains.psql | 4 ++-- Model/lib/psql/webtables/MO/TaxonSpecies.psql | 4 ++-- Model/lib/psql/webtables/MO/Taxonomy.psql | 2 +- .../psql/webtables/MO/TranscriptCenDistance.psql | 13 ++++++------- .../psql/webtables/MO/TranscriptSequence.psql | 10 +++------- .../psql/webtables/MO/TransmembraneDomains.psql | 4 ++-- Model/lib/xml/tuningManager/webtables.org | 16 ++++++++-------- 17 files changed, 44 insertions(+), 55 deletions(-) diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql index fe64a5fecf..04065d2b26 100644 --- a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql +++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql @@ -1,8 +1,6 @@ -:CREATE_AND_POPULATE - - - CREATE TABLE ChIPchipTranscript AS + :CREATE_AND_POPULATE SELECT DISTINCT ta.source_id, ta.gene_source_id, ta.project_id, sr.protocol_app_node_id, + ta.project_id, ta.org_abbrev, current_timestamp as modification_date CASE WHEN ta.is_reversed = 0 THEN round(abs(ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0) @@ -28,7 +26,7 @@ END END as direction, sr.score1 as score - FROM TranscriptAttributes ta, + FROM :SCHEMA.TranscriptAttributes ta, Results.segmentresult sr, Study.StudyLink sl, Study.Study s @@ -38,7 +36,6 @@ AND lower(s.name) like '%chip%peaks' AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000) or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) ) - - + AND ta.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/CodingSequence.psql b/Model/lib/psql/webtables/MO/CodingSequence.psql index e1560b855f..a1682717e9 100644 --- a/Model/lib/psql/webtables/MO/CodingSequence.psql +++ b/Model/lib/psql/webtables/MO/CodingSequence.psql @@ -1,14 +1,11 @@ :CREATE_AND_POPULATE - - - CREATE TABLE CodingSequence AS - SELECT ta.source_id, ta.project_id, + SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + ta.source_id, SUBSTR(sns.sequence, tf.translation_start::INTEGER, tf.translation_stop::INTEGER - tf.translation_start::INTEGER + 1) as sequence - FROM TranscriptAttributes ta, dots.SplicedNaSequence sns, dots.TranslatedAaFeature tf + FROM :SCHEMA.TranscriptAttributes ta, dots.SplicedNaSequence sns, dots.TranslatedAaFeature tf WHERE ta.source_id = sns.source_id AND ta.na_feature_id = tf.na_feature_id - - + AND ta.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql index 6b964c4328..d00a1a5e8d 100644 --- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql +++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql @@ -1,7 +1,7 @@ :CREATE_AND_POPULATE SELECT - :PROJECT_ID as project_id, - :ORG_ABBREV as org_abbrev, + ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, SUBSTR(sequence.source_id, 1, 60) AS source_id, sequence.a_count, sequence.c_count, sequence.g_count, sequence.t_count, diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql index ee30a1c85f..489b450430 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId.psql @@ -1,6 +1,6 @@ :CREATE_AND_POPULATE SELECT DISTINCT substr(id, 1, 60) as id, substr(sequence, 1, 60) AS sequence, - :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date FROM ( SELECT ns.source_id as id, ns.source_id as sequence FROM dots.NaSequence ns, sres.OntologyTerm oterm diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql index 86d8919c6b..bc1f31fb20 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql @@ -1,6 +1,6 @@ :CREATE_AND_POPULATE - SELECT :PROJECT_ID as project_id, - :ORG_ABBREV as org_abbrev, + SELECT ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, sa.source_id, ns.sequence diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql index 5490dbfaad..bcdedad216 100644 --- a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql +++ b/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql @@ -1,6 +1,6 @@ :CREATE_AND_POPULATE select tn.name as organism, o.name_for_filenames, - :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date from apidb.Organism o, sres.TaxonName tn where o.taxon_id = tn.taxon_id and tn.name_class = 'scientific name' diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql index cee5ffe550..faf2286741 100644 --- a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql +++ b/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql @@ -1,5 +1,5 @@ :CREATE_AND_POPULATE - select sub.*, :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, CURRENT_TIMESTAMP as modification_date + select sub.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, CURRENT_TIMESTAMP as modification_date from ( SELECT organism, parent, abbreviation FROM OrganismAbbreviationWS diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql index d37233b9b2..34c531b5c8 100644 --- a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql +++ b/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql @@ -26,8 +26,8 @@ AND tn1.taxon_id = o.taxon_id ) SELECT - :PROJECT_ID as project_id - , :ORG_ABBREV as org_abbrev + ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev , current_timestamp as modification_date , organisms.organism , organisms.public_abbrev diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity.psql b/Model/lib/psql/webtables/MO/PdbSimilarity.psql index 7a2644ce2b..b3de1cfe9b 100644 --- a/Model/lib/psql/webtables/MO/PdbSimilarity.psql +++ b/Model/lib/psql/webtables/MO/PdbSimilarity.psql @@ -1,6 +1,6 @@ :CREATE_AND_POPULATE - SELECT :PROJECT_ID as project_id, - :ORG_ABBREV as org_abbrev, + SELECT ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, ta.source_id, eas.source_id AS pdb_chain, substr(eas.description, 1, 100) AS pdb_title, diff --git a/Model/lib/psql/webtables/MO/ProteinSequence.psql b/Model/lib/psql/webtables/MO/ProteinSequence.psql index 9517f9436b..68a8664412 100644 --- a/Model/lib/psql/webtables/MO/ProteinSequence.psql +++ b/Model/lib/psql/webtables/MO/ProteinSequence.psql @@ -3,7 +3,7 @@ SELECT distinct source_id, aa_sequence_id FROM ProteinAttributes where org_abbrev = ':ORG_ABBREV') SELECT pa.source_id, tas.sequence, - :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, current_timestamp as modification_date + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM pAttr pa, dots.TranslatedAaSequence tas WHERE pa.aa_sequence_id = tas.aa_sequence_id :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql index 2ce374b972..a23b7ea12b 100644 --- a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql +++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql @@ -1,7 +1,7 @@ :CREATE_AND_POPULATE SELECT - :PROJECT_ID as project_id - , :ORG_ABBREV as org_abbrev + ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev , current_timestamp as modification_date , gf.source_id gene_source_id , t.source_id transcript_source_id diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webtables/MO/TaxonSpecies.psql index d0aa9578a2..b7d15cbbdc 100644 --- a/Model/lib/psql/webtables/MO/TaxonSpecies.psql +++ b/Model/lib/psql/webtables/MO/TaxonSpecies.psql @@ -11,8 +11,8 @@ WHERE cte.parent_id = sub.taxon_id ) SELECT c.taxon_id, c.parent_id as species_taxon_id, - :PROJECT_ID as project_id, - :ORG_ABBREV as org_abbrev, + ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM cte c, sres.taxon t WHERE t.taxon_id = c.parent_id diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webtables/MO/Taxonomy.psql index 8fe3bf229c..87f0a5c1d9 100644 --- a/Model/lib/psql/webtables/MO/Taxonomy.psql +++ b/Model/lib/psql/webtables/MO/Taxonomy.psql @@ -18,6 +18,6 @@ AND tax.name != 'root' ) SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum, - :PROJECT_ID as project_id, :ORG_ABBREV as org_abbrev, current_timestamp as modification_date + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM (SELECT cte.* FROM cte ORDER BY path) t :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql index 9715d5f08b..f1f9056005 100644 --- a/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql +++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql @@ -1,19 +1,18 @@ :CREATE_AND_POPULATE - - - CREATE TABLE TranscriptCenDistance AS - SELECT DISTINCT tl.feature_source_id AS transcript, + SELECT DISTINCT + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + tl.feature_source_id AS transcript, LEAST(ABS(mfl.start_min - tl.end_max), ABS(mfl.end_max - tl.start_min)) AS centromere_distance, tl.sequence_source_id AS genomic_sequence FROM apidb.TranscriptLocation tl, apidb.FeatureLocation mfl, - sres.OntologyTerm so + dots.nasequence nas, sres.OntologyTerm so WHERE tl.na_sequence_id = mfl.na_sequence_id AND mfl.feature_type = 'Miscellaneous' AND mfl.sequence_ontology_id = so.ontology_term_id AND so.name = 'centromere' AND tl.is_top_level = 1 - - + and mfl.na_sequence_id = nas.na_sequence_id + and nas.taxon_id = :TAXON_ID :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence.psql b/Model/lib/psql/webtables/MO/TranscriptSequence.psql index 0cc12e2ff2..edccc1d800 100644 --- a/Model/lib/psql/webtables/MO/TranscriptSequence.psql +++ b/Model/lib/psql/webtables/MO/TranscriptSequence.psql @@ -1,11 +1,7 @@ :CREATE_AND_POPULATE - - - CREATE TABLE :ORG_ABBREVTranscriptSequence AS - SELECT ta.source_id, ta.project_id, sns.sequence - FROM :ORG_ABBREVTranscriptAttributes ta, dots.SplicedNaSequence sns + SELECT ta.source_id, ta.project_id, ta.org_abbrev, sns.sequence, current_timestamp as modification_date + FROM :SCHEMA.TranscriptAttributes ta, dots.SplicedNaSequence sns WHERE ta.source_id = sns.source_id - - + AND ta.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql index 99406037ee..d8915ea386 100644 --- a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql +++ b/Model/lib/psql/webtables/MO/TransmembraneDomains.psql @@ -1,6 +1,6 @@ :CREATE_AND_POPULATE - SELECT :PROJECT_ID as project_id - , :ORG_ABBREV as org_abbrev + SELECT ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev , current_timestamp as modification_date , ta.source_id as transcript_source_id , ta.gene_source_id AS gene_source_id diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index 4373beaeb9..6373429173 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -36,18 +36,18 @@ - [X] ProteinAttributes.psql - [ ] TranscriptAttributes_ix.psql - [ ] TranscriptAttributes.psql - - [ ] CodingSequence_ix.psql - - [ ] CodingSequence.psql + - [s] CodingSequence_ix.psql + - [s] CodingSequence.psql - [ ] IntronUtrCoords_ix.psql - [ ] IntronUtrCoords.psql - - [ ] TranscriptCenDistance_ix.psql - - [ ] TranscriptCenDistance.psql + - [s] TranscriptCenDistance_ix.psql + - [s] TranscriptCenDistance.psql - [ ] TranscriptPathway_ix.psql - [ ] TranscriptPathway.psql - - [ ] TranscriptSequence_ix.psql - - [ ] TranscriptSequence.psql - - [ ] ChIPchipTranscript_ix.psql - - [ ] ChIPchipTranscript.psql + - [s] TranscriptSequence_ix.psql + - [s] TranscriptSequence.psql + - [s] ChIPchipTranscript_ix.psql + - [s] ChIPchipTranscript.psql - Gene - [ ] GeneId_ix.psql From ce9d81e8ae45be7932cec71f3815c3914e93e4ca Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 13 May 2025 11:57:18 -0400 Subject: [PATCH 009/112] fix _ix files --- .../webtables/MO/ChIPchipTranscript_ix.psql | 2 +- .../psql/webtables/MO/ChrCopyNumbers_ix.psql | 8 +-- .../psql/webtables/MO/CodingSequence_ix.psql | 2 +- Model/lib/psql/webtables/MO/EqtlSpan_ix.psql | 4 +- .../psql/webtables/MO/GeneAttributes_ix.psql | 42 ++++++------- .../psql/webtables/MO/GeneCopyNumbers_ix.psql | 4 +- .../lib/psql/webtables/MO/GeneGoTable_ix.psql | 2 +- .../lib/psql/webtables/MO/GeneGoTerms_ix.psql | 2 +- Model/lib/psql/webtables/MO/GeneId_ix.psql | 10 +-- .../webtables/MO/GeneIntJuncStats_ix.psql | 2 +- .../webtables/MO/GeneIntronJunction_ix.psql | 8 +-- .../psql/webtables/MO/GeneLocations_ix.psql | 4 +- .../webtables/MO/GeneMaxIntronGIJ_ix.psql | 2 +- .../psql/webtables/MO/GeneModelDump_ix.psql | 4 +- .../webtables/MO/GenomicSeqAttributes_ix.psql | 8 +-- .../webtables/MO/GenomicSequenceId_ix.psql | 6 +- .../MO/GenomicSequenceSequence_ix.psql | 2 +- .../psql/webtables/MO/GoTermSummary_ix.psql | 4 +- .../psql/webtables/MO/IntronUtrCoords_ix.psql | 8 +-- .../psql/webtables/MO/NameMappingGIJ_ix.psql | 2 +- .../webtables/MO/OrganismAttributes_ix.psql | 2 +- .../webtables/MO/PathwaysGeneTable_ix.psql | 2 +- .../psql/webtables/MO/ProfileSamples_ix.psql | 8 +-- Model/lib/psql/webtables/MO/Profile_ix.psql | 12 ++-- .../webtables/MO/ProteinAttributes_ix.psql | 4 +- .../psql/webtables/MO/ProteinSequence_ix.psql | 2 +- .../webtables/MO/SignalPeptideDomains_ix.psql | 8 +-- Model/lib/psql/webtables/MO/TFBSGene_ix.psql | 4 +- Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 4 +- .../webtables/MO/TranscriptAttributes_ix.psql | 62 +++++++++---------- .../MO/TranscriptCenDistance_ix.psql | 4 +- .../webtables/MO/TranscriptPathway_ix.psql | 8 +-- .../webtables/MO/TranscriptSequence_ix.psql | 2 +- .../webtables/MO/TransmembraneDomains_ix.psql | 4 +- 34 files changed, 126 insertions(+), 126 deletions(-) diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql index cfe11ee5c6..bd8aaf4411 100644 --- a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql +++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.:ORG_ABBREVchpgene_geneid_idx ON :SCHEMA.:ORG_ABBREVChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id) + create index :SCHEMA.chpgene_geneid_idx ON :SCHEMA.ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id) ; diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql index d2d0448a29..9d0e711555 100644 --- a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql +++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql @@ -1,9 +1,9 @@ - CREATE INDEX :SCHEMA.:ORG_ABBREVChrCN_ix - ON :SCHEMA.:ORG_ABBREVChrCopyNumbers (input_pan_id, na_sequence_id) + CREATE INDEX :SCHEMA.ChrCN_ix + ON :SCHEMA.ChrCopyNumbers (input_pan_id, na_sequence_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVChrCN_output - ON :SCHEMA.:ORG_ABBREVChrCopyNumbers (output_pan_id) + CREATE INDEX :SCHEMA.ChrCN_output + ON :SCHEMA.ChrCopyNumbers (output_pan_id) ; diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql index 725030a983..7d3114121f 100644 --- a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.:ORG_ABBREVCodSeq_ix on :SCHEMA.:ORG_ABBREVCodingSequence (source_id, project_id) + create index :SCHEMA.CodSeq_ix on :SCHEMA.CodingSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql index 2a14698811..f4babf6466 100644 --- a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql +++ b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.:ORG_ABBREVeqtlSpan_ix - on :SCHEMA.:ORG_ABBREVeqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score) + create index :SCHEMA.eqtlSpan_ix + on :SCHEMA.eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score) ; diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql index 27f307d5dc..bd524c4b7a 100644 --- a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql @@ -1,47 +1,47 @@ - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_srcPrj - ON :SCHEMA.:ORG_ABBREVGeneAttributes (source_id) + CREATE UNIQUE INDEX :SCHEMA.GeneAttr_srcPrj + ON :SCHEMA.GeneAttributes (source_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_exon_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (exon_count, source_id, project_id) + CREATE INDEX :SCHEMA.GeneAttr_exon_ix + ON :SCHEMA.GeneAttributes (exon_count, source_id, project_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_loc_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated) + CREATE INDEX :SCHEMA.GeneAttr_loc_ix + ON :SCHEMA.GeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_feat_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed) + CREATE INDEX :SCHEMA.GeneAttr_feat_ix + ON :SCHEMA.GeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_orthoname_ix ON :SCHEMA.:ORG_ABBREVGeneAttributes ( + CREATE INDEX :SCHEMA.GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes ( orthomcl_name, source_id, taxon_id, gene_type, na_feature_id, na_sequence_id, start_min, end_max, organism, species, product, project_id ) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_ortholog_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id) + CREATE INDEX :SCHEMA.GeneAttr_ortholog_ix + ON :SCHEMA.GeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_orgsrc_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (organism, source_id, na_sequence_id, start_min, end_max) + CREATE INDEX :SCHEMA.GeneAttr_orgsrc_ix + ON :SCHEMA.GeneAttributes (organism, source_id, na_sequence_id, start_min, end_max) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_prjsrc_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0)) + CREATE INDEX :SCHEMA.GeneAttr_prjsrc_ix + ON :SCHEMA.GeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0)) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_txid_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id) + CREATE INDEX :SCHEMA.GeneAttr_txid_ix + ON :SCHEMA.GeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_ids_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (na_feature_id, source_id, project_id) + CREATE INDEX :SCHEMA.GeneAttr_ids_ix + ON :SCHEMA.GeneAttributes (na_feature_id, source_id, project_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneAttr_loc_intjunc_ix - ON :SCHEMA.:ORG_ABBREVGeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX) + CREATE INDEX :SCHEMA.GeneAttr_loc_intjunc_ix + ON :SCHEMA.GeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX) ; diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql index 0108c24e21..4cb38aaa7f 100644 --- a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql @@ -1,4 +1,4 @@ - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneCN_ix - ON :SCHEMA.:ORG_ABBREVGeneCopyNumbers (input_pan_id, na_sequence_id) + CREATE INDEX :SCHEMA.GeneCN_ix + ON :SCHEMA.GeneCopyNumbers (input_pan_id, na_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql index 50f4e8b3bb..a2ef5c49ec 100644 --- a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.:ORG_ABBREVggtab_ix ON :SCHEMA.:ORG_ABBREVGeneGoTable + create index :SCHEMA.ggtab_ix ON :SCHEMA.GeneGoTable (source_id, project_id, go_id, transcript_ids, is_not, go_term_name, ontology, source, evidence_code, reference, evidence_code_parameter, sort_key) ; diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql index 8d6745ff93..d8456c06e7 100644 --- a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql @@ -1,4 +1,4 @@ - create index ggt_ix ON :SCHEMA.:ORG_ABBREVGeneGoTerms + create index ggt_ix ON :SCHEMA.GeneGoTerms (gene_source_id, transcript_source_id, ontology, go_id, go_term_id, go_term_name, source, evidence_code, reference, evidence_code_parameter, aa_sequence_id, is_not) diff --git a/Model/lib/psql/webtables/MO/GeneId_ix.psql b/Model/lib/psql/webtables/MO/GeneId_ix.psql index 55e4ca91c9..2f2fd86202 100644 --- a/Model/lib/psql/webtables/MO/GeneId_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneId_ix.psql @@ -1,15 +1,15 @@ - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_gene_idx ON :SCHEMA.:ORG_ABBREVGeneId (gene, id) + CREATE INDEX :SCHEMA.GeneId_gene_idx ON :SCHEMA.GeneId (gene, id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_id_idx ON :SCHEMA.:ORG_ABBREVGeneId (id, gene) + CREATE INDEX :SCHEMA.GeneId_id_idx ON :SCHEMA.GeneId (id, gene) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_uniqid_idx ON :SCHEMA.:ORG_ABBREVGeneId (unique_mapping, id, gene) + CREATE INDEX :SCHEMA.GeneId_uniqid_idx ON :SCHEMA.GeneId (unique_mapping, id, gene) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_lowid_idx ON :SCHEMA.:ORG_ABBREVGeneId (lower(id), gene) + CREATE INDEX :SCHEMA.GeneId_lowid_idx ON :SCHEMA.GeneId (lower(id), gene) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGeneId_uniqlowid_idx ON :SCHEMA.:ORG_ABBREVGeneId (unique_mapping, lower(id), gene) + CREATE INDEX :SCHEMA.GeneId_uniqlowid_idx ON :SCHEMA.GeneId (unique_mapping, lower(id), gene) ; diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql index 063e77a1c3..c295007639 100644 --- a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.:ORG_ABBREVGeneIntJuncStat_ix on :SCHEMA.:ORG_ABBREVGeneIntJuncStats (na_sequence_id) + create index :SCHEMA.GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql index 6e650ac67c..14c8fdbd4a 100644 --- a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql @@ -1,11 +1,11 @@ - create index :SCHEMA.:ORG_ABBREVgijnew_loc_ix on :SCHEMA.:ORG_ABBREVGeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) + create index :SCHEMA.gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) ; - create index :SCHEMA.:ORG_ABBREVgijnew_gnscid_ix on :SCHEMA.:ORG_ABBREVGeneIntronJunction (intron_feature_id) + create index :SCHEMA.gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id) ; - create index :SCHEMA.:ORG_ABBREVgijnew_txnloc_ix - on :SCHEMA.:ORG_ABBREVGeneIntronJunction + create index :SCHEMA.gijnew_txnloc_ix + on :SCHEMA.GeneIntronJunction (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed, total_unique, total_isrpm, annotated_intron) diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql index 45fdf888d5..135dfff694 100644 --- a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql @@ -1,4 +1,4 @@ - create :SCHEMA.:ORG_ABBREVindex gloc_ix - on :SCHEMA.:ORG_ABBREVGeneLocations (source_id, locations) + create :SCHEMA.index gloc_ix + on :SCHEMA.GeneLocations (source_id, locations) ; diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql index 0491ee072b..927c41fb89 100644 --- a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql @@ -1,3 +1,3 @@ - CREATE INDEX :SCHEMA.:ORG_ABBREVGnMxIntGIJ_ix on :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJ (gene_source_id,protocol_app_node_id) + CREATE INDEX :SCHEMA.GnMxIntGIJ_ix on :SCHEMA.GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql index 4fa21c53fa..25fc836319 100644 --- a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql @@ -1,5 +1,5 @@ - create index :SCHEMA.:ORG_ABBREVgmd_ix - on :SCHEMA.:ORG_ABBREVGeneModelDump + create index :SCHEMA.gmd_ix + on :SCHEMA.GeneModelDump (source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids) ; diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql index e9b08230da..709e8d2934 100644 --- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql @@ -1,11 +1,11 @@ - create unique index :SCHEMA.:ORG_ABBREVpk_SeqAttr_ ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (lower(source_id), project_id) + create unique index :SCHEMA.pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id) ; - create unique index :SCHEMA.:ORG_ABBREVSeqAttr_source_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (source_id) + create unique index :SCHEMA.SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id) ; - create unique index :SCHEMA.:ORG_ABBREVSeqAttr_naseqid ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (na_sequence_id) + create unique index :SCHEMA.SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id) ; - create unique index :SCHEMA.:ORG_ABBREVSeqAttr_taxsrc_id ON :SCHEMA.:ORG_ABBREVGenomicSeqAttributes (taxon_id, source_id) + create unique index :SCHEMA.SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id) ; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql index c8b82387ac..093edf9acb 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql @@ -1,9 +1,9 @@ - CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_sequence_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (sequence, id) + CREATE INDEX :SCHEMA.GenSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_id_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (id, sequence) + CREATE INDEX :SCHEMA.GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVGenSeqId_lowid_idx ON :SCHEMA.:ORG_ABBREVGenomicSequenceId (lower(id), sequence) + CREATE INDEX :SCHEMA.GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence) ; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql index 8cf38e086b..ea41f21285 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.:ORG_ABBREVGenomicSeq_ix on :SCHEMA.:ORG_ABBREVGenomicSequenceSequence (source_id, project_id) + create index :SCHEMA.GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql index aff06120f1..0b0ef12e90 100644 --- a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql +++ b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql @@ -1,7 +1,7 @@ - create index :SCHEMA.:ORG_ABBREVGoTermSum_aaSeqId_idx ON :SCHEMA.:ORG_ABBREVGoTermSummary (aa_sequence_id, go_id, source) + create index :SCHEMA.GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source) ; - create index :SCHEMA.:ORG_ABBREVGoTermSum_plugin_ix ON :SCHEMA.:ORG_ABBREVGoTermSummary + create index :SCHEMA.GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary (ontology, gene_source_id, is_not, is_go_slim, go_id, go_term_name, evidence_code, evidence_category) ; diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql index e844e8da5b..c1359297b9 100644 --- a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql +++ b/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql @@ -1,8 +1,8 @@ - CREATE INDEX :SCHEMA.:ORG_ABBREViuc_srcid_ix - ON :SCHEMA.:ORG_ABBREVIntronUtrCoords (source_id, na_feature_id) + CREATE INDEX :SCHEMA.iuc_srcid_ix + ON :SCHEMA.IntronUtrCoords (source_id, na_feature_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREViuc_nfid_ix - ON :SCHEMA.:ORG_ABBREVIntronUtrCoords (na_feature_id, source_id) + CREATE INDEX :SCHEMA.iuc_nfid_ix + ON :SCHEMA.IntronUtrCoords (na_feature_id, source_id) ; diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql index 6a25756c1e..443efe39c8 100644 --- a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql +++ b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.:ORG_ABBREVnamemappinggij_ix on :SCHEMA.:ORG_ABBREVNameMappingGIJ (junctions_pan_id,exp_pan_id) + create index :SCHEMA.namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id) ; diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql index 27ab99b34a..b018637f7d 100644 --- a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql @@ -1,3 +1,3 @@ -create unique index :SCHEMA.:ORG_ABBREVOrganism_sourceId_idx ON :SCHEMA.:ORG_ABBREVOrganismAttributes (source_id) +create unique index :SCHEMA.Organism_sourceId_idx ON :SCHEMA.OrganismAttributes (source_id) ; diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql index fe2988a30e..2cc01784ca 100644 --- a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql +++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.:ORG_ABBREVpgt_ix on :SCHEMA.:ORG_ABBREVPathwaysGeneTable + create index :SCHEMA.pgt_ix on :SCHEMA.PathwaysGeneTable (gene_source_id, project_id, pathway_source_id, pathway_name, reactions, enzyme, expasy_url, pathway_source, exact_match) diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql index 82054a5477..fceecb4adf 100644 --- a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql +++ b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql @@ -1,12 +1,12 @@ - create :SCHEMA.:ORG_ABBREVindex psamp_ix - on :SCHEMA.:ORG_ABBREVProfileSamples + create :SCHEMA.index psamp_ix + on :SCHEMA.ProfileSamples (dataset_name, profile_type, study_id, node_order_num, protocol_app_node_id, profile_set_suffix, study_name, node_type, protocol_app_node_name) ; - create index :SCHEMA.:ORG_ABBREVpsampstdy_ix - on :SCHEMA.:ORG_ABBREVProfileSamples + create index :SCHEMA.psampstdy_ix + on :SCHEMA.ProfileSamples (study_name, node_type, profile_type, node_order_num, protocol_app_node_id, profile_set_suffix, study_id, protocol_app_node_name, dataset_name) diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/MO/Profile_ix.psql index 0839baa093..e4aebd5c31 100644 --- a/Model/lib/psql/webtables/MO/Profile_ix.psql +++ b/Model/lib/psql/webtables/MO/Profile_ix.psql @@ -1,12 +1,12 @@ - create index :SCHEMA.:ORG_ABBREVexprof_idx - on :SCHEMA.:ORG_ABBREVProfile (source_id, profile_type, profile_set_name) + create index :SCHEMA.exprof_idx + on :SCHEMA.Profile (source_id, profile_type, profile_set_name) ; - create index :SCHEMA.:ORG_ABBREVprofset_idx - on :SCHEMA.:ORG_ABBREVProfile (profile_set_name, profile_type) + create index :SCHEMA.profset_idx + on :SCHEMA.Profile (profile_set_name, profile_type) ; - create index :SCHEMA.:ORG_ABBREVsrcdset_idx - on :SCHEMA.:ORG_ABBREVProfile (source_id, dataset_subtype, dataset_type) + create index :SCHEMA.srcdset_idx + on :SCHEMA.Profile (source_id, dataset_subtype, dataset_type) ; diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql index 9279d85729..2faff407d3 100644 --- a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql @@ -1,6 +1,6 @@ - CREATE INDEX :SCHEMA.:ORG_ABBREVPA_sourceId ON :SCHEMA.:ORG_ABBREVProteinAttributes (source_id) + CREATE INDEX :SCHEMA.PA_sourceId ON :SCHEMA.ProteinAttributes (source_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVPA_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinAttributes (aa_sequence_id) + CREATE INDEX :SCHEMA.PA_aaSequenceId ON :SCHEMA.ProteinAttributes (aa_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql index 7f37d152ef..a11708b620 100644 --- a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.:ORG_ABBREVProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id) + create index :SCHEMA.ProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql index ddd24897a3..5cd5d15432 100644 --- a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql +++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql @@ -1,8 +1,8 @@ - CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP1_ix - ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (aa_sequence_id) + CREATE INDEX :SCHEMA.SignalP1_ix + ON :SCHEMA.SignalPeptideDomains (aa_sequence_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVSignalP2_ix - ON :SCHEMA.:ORG_ABBREVSignalPeptideDomains (gene_source_id, transcript_source_id, end_max) + CREATE INDEX :SCHEMA.SignalP2_ix + ON :SCHEMA.SignalPeptideDomains (gene_source_id, transcript_source_id, end_max) ; diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql index 7563dac0c5..5acc3e9f9b 100644 --- a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql +++ b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql @@ -1,6 +1,6 @@ - create index :SCHEMA.:ORG_ABBREVtfbs_geneid_idx ON :SCHEMA.:ORG_ABBREVTFBSGene (gene_source_id, tfbs_na_feature_id) + create index :SCHEMA.tfbs_geneid_idx ON :SCHEMA.TFBSGene (gene_source_id, tfbs_na_feature_id) ; - create index :SCHEMA.:ORG_ABBREVgeneid_tfbs_idx ON :SCHEMA.:ORG_ABBREVTFBSGene (tfbs_na_feature_id,gene_source_id) + create index :SCHEMA.geneid_tfbs_idx ON :SCHEMA.TFBSGene (tfbs_na_feature_id,gene_source_id) ; diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql index b3e8518849..7b29c4aa03 100644 --- a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql +++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql @@ -1,5 +1,5 @@ - create index :SCHEMA.:ORG_ABBREVtax_ix - on :SCHEMA.:ORG_ABBREVTaxonomy + create index :SCHEMA.tax_ix + on :SCHEMA.Taxonomy (organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql index 481959c6d7..08b54452aa 100644 --- a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql @@ -1,71 +1,71 @@ - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_sourceId - ON :ORG_ABBREVTranscriptAttributes (source_id) + CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_sourceId + ON TranscriptAttributes (source_id) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_srcPrj - ON :ORG_ABBREVTranscriptAttributes (source_id, gene_source_id, project_id) + CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_srcPrj + ON TranscriptAttributes (source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_genesrc - ON :ORG_ABBREVTranscriptAttributes (gene_source_id, source_id, project_id) + CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_genesrc + ON TranscriptAttributes (gene_source_id, source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_exon_ix - ON :ORG_ABBREVTranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id) + CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_exon_ix + ON TranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_loc_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes + CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_loc_ix + ON :SCHEMA.TranscriptAttributes (na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id, is_deprecated, source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_feat_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id) + CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_feat_ix + ON :SCHEMA.TranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_geneid_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_id, source_id, gene_source_id, project_id) + CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_geneid_ix + ON :SCHEMA.TranscriptAttributes (gene_id, source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTransAttr_orthoname_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id) + CREATE UNIQUE INDEX :SCHEMA.TransAttr_orthoname_ix + ON :SCHEMA.TranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTransAttr_molwt_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id) + CREATE UNIQUE INDEX :SCHEMA.TransAttr_molwt_ix + ON :SCHEMA.TranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_ortholog_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes + CREATE INDEX :SCHEMA.TransAttr_ortholog_ix + ON :SCHEMA.TranscriptAttributes (source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_orgsrc_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max) + CREATE INDEX :SCHEMA.TransAttr_orgsrc_ix + ON :SCHEMA.TranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_lwrsrc_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id) + CREATE INDEX :SCHEMA.TransAttr_lwrsrc_ix + ON :SCHEMA.TranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_species_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id) + CREATE INDEX :SCHEMA.TransAttr_species_ix + ON :SCHEMA.TranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id) ; CREATE UNIQUE INDEX TrnscrptAttr_geneinfo - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes + ON :SCHEMA.TranscriptAttributes (gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id, protein_source_id, na_sequence_id, length, protein_length, five_prime_utr_length, three_prime_utr_length) ; - CREATE UNIQUE INDEX :SCHEMA.:ORG_ABBREVTranscriptAttr_genenaf - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id) + CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_genenaf + ON :SCHEMA.TranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id) ; - CREATE INDEX :SCHEMA.:ORG_ABBREVTransAttr_locsIds_ix - ON :SCHEMA.:ORG_ABBREVTranscriptAttributes + CREATE INDEX :SCHEMA.TransAttr_locsIds_ix + ON :SCHEMA.TranscriptAttributes (na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql index 8b77d66964..ad1c71a2fc 100644 --- a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.:ORG_ABBREVGCent_loc_ix - on :SCHEMA.:ORG_ABBREVTranscriptCenDistance (genomic_sequence, centromere_distance) + create index :SCHEMA.GCent_loc_ix + on :SCHEMA.TranscriptCenDistance (genomic_sequence, centromere_distance) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql index 26773299a5..2b05b42f44 100644 --- a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql @@ -1,12 +1,12 @@ - create index :SCHEMA.:ORG_ABBREVTranscriptPath_ix - on :SCHEMA.:ORG_ABBREVTranscriptPathway + create index :SCHEMA.TranscriptPath_ix + on :SCHEMA.TranscriptPathway (gene_source_id, source_id, pathway_source_id, pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway, ec_number_pathway, pathway_source) ; - create index :SCHEMA.:ORG_ABBREVTranscriptPathSource_ix - on :SCHEMA.:ORG_ABBREVTranscriptPathway (pathway_source, + create index :SCHEMA.TranscriptPathSource_ix + on :SCHEMA.TranscriptPathway (pathway_source, gene_source_id, source_id) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql index 73f233d48e..1e1b36617d 100644 --- a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.:ORG_ABBREVXScriptSeq_ix on :SCHEMA.:ORG_ABBREVTranscriptSequence (source_id, project_id) + create index :SCHEMA.XScriptSeq_ix on :SCHEMA.TranscriptSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql index e6d76b992a..613aff3e51 100644 --- a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql +++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.:ORG_ABBREVTransDom1_ix - on :SCHEMA.:ORG_ABBREVTransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology) + create index :SCHEMA.TransDom1_ix + on :SCHEMA.TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology) ; From 577e9fd7c9d605e79717e47b92e6919479c33c6f Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Tue, 13 May 2025 14:16:07 -0400 Subject: [PATCH 010/112] wip --- .../psql/webtables/MO/ChIPchipTranscript.psql | 6 +- Model/lib/psql/webtables/MO/EqtlSpan.psql | 9 +- .../webtables/MO/EstAlignmentGeneSummary.psql | 32 ++++--- .../lib/psql/webtables/MO/GeneAttributes.psql | 34 ++++--- .../psql/webtables/MO/GeneCopyNumbers.psql | 13 +-- Model/lib/psql/webtables/MO/GeneGoTable.psql | 21 ++-- Model/lib/psql/webtables/MO/GeneGoTerms.psql | 8 +- Model/lib/psql/webtables/MO/GeneId.psql | 95 +++++++++++-------- .../lib/psql/webtables/MO/GeneLocations.psql | 8 +- .../lib/psql/webtables/MO/GeneModelDump.psql | 13 +-- .../psql/webtables/MO/GeneSummaryFilter.psql | 11 +-- .../lib/psql/webtables/MO/GoTermSummary.psql | 16 ++-- .../psql/webtables/MO/IntronUtrCoords.psql | 13 +-- Model/lib/psql/webtables/MO/TFBSGene.psql | 11 ++- .../psql/webtables/MO/TranscriptPathway.psql | 58 +++-------- Model/lib/xml/tuningManager/tablePruning.txt | 8 +- Model/lib/xml/tuningManager/webtables.org | 83 +++++++++------- 17 files changed, 224 insertions(+), 215 deletions(-) diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql index 04065d2b26..4e1996000d 100644 --- a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql +++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql @@ -28,10 +28,10 @@ sr.score1 as score FROM :SCHEMA.TranscriptAttributes ta, Results.segmentresult sr, - Study.StudyLink sl, - Study.Study s + Study.nodenodeset sl, + Study.nodeset s WHERE sr.na_sequence_id = ta.na_sequence_id - AND s.study_id = sl.study_id + AND s.node_set_id = sl.node_set_id AND sl.protocol_app_node_id = sr.protocol_app_node_id AND lower(s.name) like '%chip%peaks' AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000) diff --git a/Model/lib/psql/webtables/MO/EqtlSpan.psql b/Model/lib/psql/webtables/MO/EqtlSpan.psql index 10215fca8e..fdad100be2 100644 --- a/Model/lib/psql/webtables/MO/EqtlSpan.psql +++ b/Model/lib/psql/webtables/MO/EqtlSpan.psql @@ -1,8 +1,6 @@ :CREATE_AND_POPULATE - - - create table eqtlSpan as - SELECT gene_source_id, project_id, haplotype_block_name as hapblock_id, sequence_id, + SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + gene_source_id, haplotype_block_name as hapblock_id, sequence_id, start_min, end_max, start_max, end_min, max(score) as lod_score, organism FROM (SELECT ga.gene_source_id, ga.project_id, gls.haplotype_block_name, @@ -10,11 +8,12 @@ gls.lod_score_mant * power(10::double precision, gls.lod_score_exp) as score, replace (ga.organism, ' ', '+') as organism FROM dots.ChromosomeElementFeature cef, apidb.NAFeatureHaploblock gls, - dots.ExternalNaSequence ens, dots.NaLocation nl, TranscriptAttributes ga + dots.ExternalNaSequence ens, dots.NaLocation nl, :SCHEMA.TranscriptAttributes ga WHERE gls.na_feature_id = ga.gene_na_feature_id AND cef.name = gls.haplotype_block_name AND nl.na_feature_id = cef.na_feature_id AND cef.na_sequence_id = ens.na_sequence_id + AND ga.org_abbrev = ':ORG_ABBREV' AND (gls.lod_score_mant * power(10::double precision, gls.lod_score_exp)) >= 1.5 ) t GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name, diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql index 90b4788bce..4dbff1eca8 100644 --- a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql +++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql @@ -1,6 +1,6 @@ - CREATE UNLOGGED TABLE EstAlignmentGene AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp AS SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession, e.library_id, ba.query_taxon_id, ba.target_na_sequence_id, ba.target_taxon_id, ba.percent_identity, ba.is_consistent, @@ -11,21 +11,23 @@ AS est_gene_overlap_length, ba.query_bases_aligned / (query_sequence.length) * 100 AS percent_est_bases_aligned, - ga.gene_source_id AS gene + ga.gene_source_id AS gene,':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM dots.BlatAlignment ba, dots.Est e, - apidbtuning.TranscriptAttributes ga, + :SCHEMA.TranscriptAttributes ga, dots.NaSequence query_sequence WHERE e.na_sequence_id = ba.query_na_sequence_id AND ga.na_sequence_id = ba.target_na_sequence_id AND least(ba.target_end, ga.gene_end_max) - greatest(ba.target_start, ga.gene_start_min) >= 0 AND query_sequence.na_sequence_id = ba.query_na_sequence_id + AND ga.org_abbrev = ':ORG_ABBREV' ; - CREATE UNLOGGED TABLE EstAlignmentNoGene AS - SELECT * from EstAlignmentGene WHERE 1=0 UNION /* define datatype for null column */ + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp AS + SELECT * from :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp WHERE 1=0 UNION /* define datatype for null column */ SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession, e.library_id, ba.query_taxon_id, ba.target_na_sequence_id, ba.target_taxon_id, ba.percent_identity, ba.is_consistent, @@ -34,37 +36,39 @@ NULL AS est_gene_overlap_length, ba.query_bases_aligned / (query_sequence.length) * 100 AS percent_est_bases_aligned, - NULL AS gene + NULL AS gene,':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM dots.BlatAlignment ba, dots.Est e, dots.AssemblySequence aseq, dots.NaSequence sequence, dots.NaSequence query_sequence WHERE e.na_sequence_id = ba.query_na_sequence_id AND e.na_sequence_id = query_sequence.na_sequence_id AND aseq.na_sequence_id = ba.query_na_sequence_id AND ba.target_na_sequence_id = sequence.na_sequence_id + AND sequence.taxon_id = :TAXON_ID AND ba.blat_alignment_id IN ( /* set of blat_alignment_ids not in in first leg of UNION */ /* (because they overlap no genes) */ SELECT ba.blat_alignment_id FROM dots.BlatAlignment ba, dots.NaSequence query_sequence, - sres.OntologyTerm so + sres.OntologyTerm so, dots.NaSequence target_sequence, WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id AND query_sequence.sequence_ontology_id = so.ontology_term_id + AND ba.target_na_sequence_id = target_sequence.na_sequence_id + AND target_sequence.taxon_id = :TAXON_ID AND so.name = 'EST' EXCEPT - SELECT blat_alignment_id FROM EstAlignmentGene) + SELECT blat_alignment_id FROM :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp) ; :CREATE_AND_POPULATE - - CREATE TABLE EstAlignmentGeneSummary AS - SELECT * FROM EstAlignmentNoGene + SELECT * FROM :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp UNION - SELECT * FROM EstAlignmentGene + SELECT * FROM :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp :DECLARE_PARTITION; -drop table EstAlignmentGene; -drop table EstAlignmentNoGene; \ No newline at end of file +drop table :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp; +drop table :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp; diff --git a/Model/lib/psql/webtables/MO/GeneAttributes.psql b/Model/lib/psql/webtables/MO/GeneAttributes.psql index 8c78e847c9..490b20f223 100644 --- a/Model/lib/psql/webtables/MO/GeneAttributes.psql +++ b/Model/lib/psql/webtables/MO/GeneAttributes.psql @@ -1,8 +1,7 @@ :CREATE_AND_POPULATE - - - CREATE TABLE :ORG_ABBREVGeneAttributes AS - SELECT DISTINCT project_id + SELECT DISTINCT ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev + , current_timestamp as modification_date , ta.gene_source_id AS source_id , gene_na_feature_id AS na_feature_id , na_sequence_id @@ -59,11 +58,12 @@ , gene_zoom_context_end as zoom_context_end , cast (null as numeric) as strain_count , ta.gene_locations as locations - FROM :ORG_ABBREVTranscriptAttributes ta + FROM :SCHEMA.TranscriptAttributes ta INNER JOIN ( SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo, MIN(gene_product) AS product, substr(STRING_AGG(transcript_product, ',' order by transcript_product), 1, 240) as transcript_product - FROM :ORG_ABBREVTranscriptAttributes + FROM :SCHEMA.TranscriptAttributes + WHERE org_abbrev = ':ORG_ABBREV' GROUP BY gene_source_id ) aggregates ON ta.gene_source_id = aggregates.gene_source_id LEFT JOIN ( @@ -71,11 +71,12 @@ substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id, substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal FROM ( - SELECT distinct t.parent_id as na_feature_id, dr.primary_identifier as uniprot_id - FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t, + SELECT distinct t.gene_na_feature_id as na_feature_id, dr.primary_identifier as uniprot_id + FROM sres.DbRef dr, dots.DbRefNaFeature x, :SCHEMA.Transcriptattributes t, sres.ExternalDatabase d, sres.ExternalDatabaseRelease r WHERE dr.db_ref_id = x.DB_REF_ID - AND (x.na_feature_id = t.na_feature_id OR x.na_feature_id = t.parent_id) + AND t.org_abbrev = ':ORG_ABBREV' + AND (x.na_feature_id = t.na_feature_id OR x.na_feature_id = t.gene_na_feature_id) AND dr.external_database_release_id = r.external_database_release_id AND r.external_database_id = d.external_database_id AND (d.name like '%uniprot_dbxref_RSRC' @@ -86,28 +87,31 @@ GROUP BY na_feature_id ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id + WHERE ta.org_abbrev = ':ORG_ABBREV' ORDER BY ta.gene_source_id - - :DECLARE_PARTITION; - CREATE TABLE :ORG_ABBREVSpeciesInfo as + CREATE unlogged TABLE :SCHEMA.:ORG_ABBREVSpeciesInfoTmp as SELECT genus_species, count(distinct organism) as strain_count - FROM :ORG_ABBREVGeneAttributes + FROM :SCHEMA.GeneAttributes + WHERE org_abbrev = ':ORG_ABBREV' GROUP BY genus_species ; - UPDATE :ORG_ABBREVGeneAttributes ga + UPDATE :SCHEMA.GeneAttributes ga SET strain_count = ( SELECT strain_count - FROM :ORG_ABBREVSpeciesInfo si + FROM :SCHEMA.:ORG_ABBREVSpeciesInfo si WHERE si.genus_species = ga.genus_species ) + WHERE org_abbrev = ':ORG_ABBREV' ; +drop table :SCHEMA.:ORG_ABBREVSpeciesInfo +; diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql index ac518347bf..fab9996a93 100644 --- a/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql +++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql @@ -1,8 +1,7 @@ :CREATE_AND_POPULATE - - - CREATE TABLE GeneCopyNumbers AS - SELECT DISTINCT ta.project_id + SELECT DISTINCT ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev + , current_timestamp as modification_date , ta.source_id , ta.gene_source_id , REGEXP_REPLACE(pan.name, '_[A-Za-z0-9]+ (.+)$', '') AS strain @@ -17,12 +16,14 @@ , io.output_pan_id FROM apidb.genecopynumber gcn , study.protocolappnode pan - , TranscriptAttributes ta - , PANIo io + , :SCHEMA.TranscriptAttributes ta + , :SCHEMA.PANIo io WHERE gcn.protocol_app_node_id = pan.protocol_app_node_id AND gcn.na_feature_id = ta.gene_na_feature_id AND gcn.protocol_app_node_id = io.output_pan_id AND (ta.gene_type = 'protein coding' or ta.gene_type = 'protein coding gene') + AND ta.org_abbrev = ':ORG_ABBREV' + AND io.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/GeneGoTable.psql b/Model/lib/psql/webtables/MO/GeneGoTable.psql index b83c188e7f..25fa844910 100644 --- a/Model/lib/psql/webtables/MO/GeneGoTable.psql +++ b/Model/lib/psql/webtables/MO/GeneGoTable.psql @@ -1,12 +1,13 @@ :CREATE_AND_POPULATE - - - CREATE TABLE GeneGoTable AS - SELECT source_id, project_id, go_id, - string_agg(transcript_source_id, ', ' order by transcript_source_id) as transcript_ids, - is_not, - max(go_term_name) as go_term_name, ontology, source, evidence_code, - reference, evidence_code_parameter, sort_key + SELECT source_id, + ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, + current_timestamp as modification_date, + go_id, + string_agg(transcript_source_id, ', ' order by transcript_source_id) as transcript_ids, + is_not, + max(go_term_name) as go_term_name, ontology, source, evidence_code, + reference, evidence_code_parameter, sort_key FROM (SELECT DISTINCT ggt.gene_source_id as source_id, ga.project_id, replace(ggt.go_id, 'GO_', 'GO:') as go_id, ggt.transcript_source_id, @@ -14,8 +15,10 @@ ggt.go_term_name, ggt.ontology, ggt.source, ggt.evidence_code, ggt.reference, ggt.evidence_code_parameter, substr(ggt.ontology, 1, 1) || replace(ggt.go_id, 'GO_', 'GO:') as sort_key - FROM GeneGoTerms ggt, GeneAttributes ga + FROM :SCHEMA.GeneGoTerms ggt, :SCHEMA.GeneAttributes ga WHERE ggt.gene_source_id = ga.source_id + and ggt.org_abbrev = ':ORG_ABBREV' + and ga.org_abbrev = ':ORG_ABBREV' ) t GROUP BY source_id, project_id, go_id, is_not, ontology, source, evidence_code, reference, evidence_code_parameter, sort_key diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms.psql b/Model/lib/psql/webtables/MO/GeneGoTerms.psql index 4cd39239a2..87fec450b8 100644 --- a/Model/lib/psql/webtables/MO/GeneGoTerms.psql +++ b/Model/lib/psql/webtables/MO/GeneGoTerms.psql @@ -1,13 +1,11 @@ :CREATE_AND_POPULATE - - - create table :ORG_ABBREVGeneGoTerms as with root_term as (select ontology_term_id, cast(initcap(replace(name, '_', ' ')) as varchar(20)) as ontology from sres.OntologyTerm where source_id in ('GO_0008150','GO_0003674','GO_0005575')) - select gf.source_id as gene_source_id, t.source_id as transcript_source_id, taf.aa_sequence_id, + select ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + gf.source_id as gene_source_id, t.source_id as transcript_source_id, taf.aa_sequence_id, cast (CASE ga.is_not WHEN 0 THEN '' WHEN 1 THEN 'not' ELSE ga.is_not::varchar END as varchar(3)) as is_not, ns.taxon_id, cast (gt.source_id as varchar(20)) as go_id, gt.ontology_term_id as go_term_id, rt.ontology, @@ -22,7 +20,7 @@ sres.OntologyTerm gt LEFT JOIN root_term rt ON gt.ancestor_term_id = rt.ontology_term_id where t.parent_id = gf.na_feature_id and gf.na_sequence_id = ns.na_sequence_id - and (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + and ns.taxon_id = :TAXON_ID and t.na_feature_id = taf.na_feature_id and taf.aa_sequence_id = ga.row_id and ga.table_id = (select table_id diff --git a/Model/lib/psql/webtables/MO/GeneId.psql b/Model/lib/psql/webtables/MO/GeneId.psql index 41e89c83f0..7feaf7ebc2 100644 --- a/Model/lib/psql/webtables/MO/GeneId.psql +++ b/Model/lib/psql/webtables/MO/GeneId.psql @@ -1,14 +1,29 @@ -:CREATE_AND_POPULATE +create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as +(select gf.na_feature_id + , gf.na_sequence_id + , gf.external_database_release_id + , gf.is_predicted + , gf.source_id + from dots.genefeature gf + , dots.nasequence nas + where gf.na_sequence_id = nas.na_sequence_id + and nas.taxon_id = :TAXON_ID +) +; - - CREATE TABLE :ORG_ABBREVGeneId AS +create index :SCHEMA.:ORG_ABBREVGeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id) +; + + +:CREATE_AND_POPULATE SELECT substr(mapping.id, 1, 100) as id, mapping.gene, cast (0 as NUMERIC(1)) as unique_mapping, SUBSTR(string_agg(distinct union_member,'; ' order by union_member), 1, 100) as union_member, - SUBSTR(string_agg(distinct database_name,'; ' order by database_name), 1, 200) as database_name + SUBSTR(string_agg(distinct database_name,'; ' order by database_name), 1, 200) as database_name, + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM (SELECT substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) AS id, gf.source_id AS gene, 'Transcript.protein_id before dot' as union_member, ed.name as database_name /* dots.Transcript.protein_id, trimmed at period */ - FROM dots.Transcript t, dots.GeneFeature gf, + FROM dots.Transcript t, :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr WHERE t.parent_id = gf.na_feature_id AND substr(t.protein_id, 1, nullif(position('.' IN t.protein_id) - 1, -1)) IS NOT NULL @@ -18,7 +33,7 @@ SELECT t.protein_id AS id, gf.source_id AS gene, 'Transcript.protein_id' as union_member, ed.name as database_name /* dots.Transcript.protein_id */ - FROM dots.Transcript t, dots.GeneFeature gf, + FROM dots.Transcript t, :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr WHERE t.parent_id = gf.na_feature_id AND t.protein_id IS NOT NULL @@ -28,7 +43,7 @@ SELECT dr.primary_identifier AS id, gf.source_id AS gene, 'DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */ - FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE dr.primary_identifier IS NOT NULL @@ -46,7 +61,7 @@ SELECT dr.primary_identifier AS id, gf.source_id AS gene, 'DbRef.primary_identifier on Transcript' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */ - FROM dots.GeneFeature gf, dots.Transcript t, dots.DbRefNaFeature drnf, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE dr.primary_identifier IS NOT NULL @@ -64,7 +79,7 @@ SELECT dr.primary_identifier AS id, gf.source_id AS gene, 'DbRef.primary_identifier on Gene' as union_member, ed.name as database_name - FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE dr.primary_identifier IS NOT NULL @@ -77,7 +92,7 @@ SELECT dr.primary_identifier AS id, gf.source_id AS gene, 'VectorBase alternate names' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier */ - FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE dr.primary_identifier IS NOT NULL @@ -91,7 +106,7 @@ SELECT dr.primary_identifier AS id, gf.source_id AS gene, 'synonym' as union_member, ed.name as database_name - FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE dr.primary_identifier IS NOT NULL @@ -104,7 +119,7 @@ SELECT dr.secondary_identifier AS id, gf.source_id AS gene, 'DbRef.secondary_identifier' as union_member, ed.name as database_name /* sres.DbRef.secondary_identifier */ - FROM dots.GeneFeature gf, dots.DbRefNaFeature drnf, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.DbRefNaFeature drnf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE dr.secondary_identifier IS NOT NULL @@ -123,7 +138,7 @@ SELECT dr.primary_identifier AS id, gf.source_id AS gene, 'genbank DbRef.primary_identifier' as union_member, ed.name as database_name /* sres.DbRef.primary_identifier for Genbank records */ - FROM dots.GeneFeature gf, dots.Transcript t, dots.DbRefNaSequence drns, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.DbRefNaSequence drns, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE gf.na_feature_id = t.parent_id @@ -135,12 +150,15 @@ UNION SELECT pred_loc.feature_source_id AS id, gene_loc.feature_source_id AS gene, - 'overlapping predicted gene source_id' as union_member, ed.name as database_name /* dots.GeneFeature.source_id for predicted genes that overlap */ + 'overlapping predicted gene source_id' as union_member, ed.name as database_name /* dots.genefeature.source_id for predicted genes that overlap */ FROM apidb.FeatureLocation gene_loc, apidb.FeatureLocation pred_loc, - sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed, + dots.nasequence nas WHERE pred_loc.feature_type = 'GenePrediction' AND gene_loc.feature_type = 'GeneFeature' AND pred_loc.na_sequence_id = gene_loc.na_sequence_id + AND gene_loc.na_sequence_id = nas.na_sequence_id + and nas.taxon_id = :TAXON_ID AND gene_loc.start_min <= pred_loc.end_max AND gene_loc.end_max >= pred_loc.start_min AND pred_loc.is_reversed = gene_loc.is_reversed @@ -149,7 +167,7 @@ UNION SELECT ng.name AS id, gf.source_id AS gene, 'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */ - FROM dots.GeneFeature gf, dots.NaFeatureNaGene nfng, dots.NaGene ng, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.NaFeatureNaGene nfng, dots.NaGene ng, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE gf.na_feature_id = nfng.na_feature_id AND ng.na_gene_id = nfng.na_gene_id @@ -158,14 +176,14 @@ UNION SELECT source_id AS id, source_id AS gene, 'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */ - FROM dots.GeneFeature gf, + FROM :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed WHERE gf.external_database_release_id = edr.external_database_release_id AND edr.external_database_id = ed.external_database_id UNION SELECT n.name AS id, gf.source_id AS gene, 'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name - from dots.genefeature gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d, + from :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d, ( select na_feature_id, name from apidb.GeneFeatureName where is_preferred = 1 @@ -186,7 +204,7 @@ gf.source_id as gene, 'AA feature DbRef primary ID' as union_member, ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */ - from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf, + from :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.Transcript t, dots.TranslatedAaFeature taf, dots.DbRefAaFeature draf, sres.DbRef dr, sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed where gf.na_feature_id = t.parent_id @@ -200,40 +218,39 @@ 'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER', 'PRINTS','SCANPROSITE','SFLD') ) mapping, - dots.GeneFeature gf, dots.NaSequence ns + :SCHEMA.:ORG_ABBREVGeneFeatureTmp gf, dots.NaSequence ns WHERE mapping.gene = gf.source_id AND gf.na_sequence_id = ns.na_sequence_id - AND (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) AND (gf.is_predicted != 1 OR gf.is_predicted is null) GROUP BY mapping.id, mapping.gene - - :DECLARE_PARTITION; - INSERT INTO :ORG_ABBREVGeneId - (id, gene, unique_mapping, union_member, database_name) + INSERT INTO :SCHEMA.GeneId + (id, gene, unique_mapping, union_member, database_name, project_id, org_abbrev, modification_date) WITH munge AS (SELECT DISTINCT regexp_replace(id, '\.\d\d?$', '') as id, gene, unique_mapping, union_member, database_name - FROM :ORG_ABBREVGeneId + FROM :SCHEMA.GeneId + WHERE org_abbrev = ':ORG_ABBREV' -- CHECK AND FIX --WHERE regexp_like(id, '(.*)\.\d\d?$') ) - SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name + SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name, + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM munge - WHERE id NOT IN (SELECT id FROM :ORG_ABBREVGeneId ) + WHERE id NOT IN (SELECT id FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV') ; - CREATE UNLOGGED TABLE :ORG_ABBREVOneGeneIds (lower_id) AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id) AS SELECT lower_id FROM (SELECT DISTINCT lower(id) as lower_id, gene - FROM :ORG_ABBREVGeneId + FROM :SCHEMA.GeneId where org_abbrev = ':ORG_ABBREV' ) t GROUP BY lower_id HAVING count(*) = 1 @@ -242,24 +259,28 @@ - CREATE UNIQUE INDEX gix _pk ON :ORG_ABBREVOneGeneIds (lower_id) - + CREATE UNIQUE INDEX :ORG_ABBREV_gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id) ; - UPDATE :ORG_ABBREVGeneId + UPDATE :SCHEMA.GeneId SET unique_mapping = 1 WHERE id = gene - + and org_abbrev = ':ORG_ABBREV' ; - UPDATE :ORG_ABBREVGeneId + UPDATE :SCHEMA.GeneId SET unique_mapping = 1 - WHERE lower(id) IN (select lower_id from :ORG_ABBREVOneGeneIds) - + WHERE lower(id) IN (select lower_id from :SCHEMA.:ORG_ABBREVOneGeneIdsTmp) + and org_abbrev = ':ORG_ABBREV' ; + +drop table :SCHEMA.:ORG_ABBREVOneGeneIdsTmp +; +drop table :SCHEMA.:ORG_ABBREVGeneFeatureTmp +; diff --git a/Model/lib/psql/webtables/MO/GeneLocations.psql b/Model/lib/psql/webtables/MO/GeneLocations.psql index aae59b60e7..ad129a3f1b 100644 --- a/Model/lib/psql/webtables/MO/GeneLocations.psql +++ b/Model/lib/psql/webtables/MO/GeneLocations.psql @@ -1,9 +1,7 @@ :CREATE_AND_POPULATE - - - CREATE TABLE :ORG_ABBREVGeneLocations AS SELECT source_id, - string_agg(location, '; ' order by is_top_level desc) as locations + string_agg(location, '; ' order by is_top_level desc) as locations, + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM (SELECT fl.feature_source_id as source_id, fl.is_top_level, fl.sequence_source_id || ':' || trim(to_char(fl.start_min,'999,999,999')) || '..' @@ -13,7 +11,7 @@ FROM apidb.FeatureLocation fl, dots.NaSequence ns WHERE fl.feature_type = 'GeneFeature' AND fl.na_sequence_id = ns.na_sequence_id - AND (ns.taxon_id::varchar = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + AND ns.taxon_id = :TAXON_ID ) t GROUP BY source_id diff --git a/Model/lib/psql/webtables/MO/GeneModelDump.psql b/Model/lib/psql/webtables/MO/GeneModelDump.psql index 698e4fc44f..2ee1bcdf1e 100644 --- a/Model/lib/psql/webtables/MO/GeneModelDump.psql +++ b/Model/lib/psql/webtables/MO/GeneModelDump.psql @@ -1,27 +1,28 @@ :CREATE_AND_POPULATE - - - CREATE TABLE GeneModelDump as SELECT source_id, project_id, sequence_id, gm_start,gm_end, type, is_reversed, - string_agg(transcript_id, ',' ORDER BY transcript_id) AS transcript_ids + string_agg(transcript_id, ',' ORDER BY transcript_id) AS transcript_ids, + ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM ( SELECT distinct ta.source_id as transcript_id, ta.gene_source_id as source_id, ta.project_id, ta.sequence_id,gm.start_min as gm_start, gm.end_max as gm_end, gm.type, gl.is_reversed FROM apidb.FeatureLocation gl, dots.NaSequence s, - TranscriptAttributes ta, + SCHEMA.TranscriptAttributes ta, ( SELECT CASE el.feature_type WHEN 'ExonFeature' THEN 'Exon' ELSE el.feature_type END as type, el.parent_id as na_feature_id, el.start_min as start_min, el.end_max as end_max - FROM apidb.FeatureLocation el + FROM apidb.FeatureLocation el, dots.nasequence nas WHERE el.feature_type in ('ExonFeature','five_prime_UTR', 'three_prime_UTR','CDS','Intron') AND el.is_top_level = 1 + AND nas.na_sequence_id = el.na_sequence_id + and nas.taxon_id = :TAXON_ID ) gm WHERE gm.na_feature_id = ta.na_feature_id AND s.na_sequence_id = gl.na_sequence_id AND ta.na_feature_id = gl.na_feature_id AND gl.is_top_level = 1 + AND ta.org_abbrev = ':ORG_ABBREV' ) t GROUP BY source_id, project_id, sequence_id, gm_start, gm_end, type, is_reversed diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql b/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql index c691f5b8ce..02e0664c2e 100644 --- a/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql +++ b/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql @@ -1,13 +1,12 @@ :CREATE_AND_POPULATE - - - CREATE TABLE GeneSummaryFilter AS - SELECT CAST(filter_name AS VARCHAR(80)) AS filter_name + SELECT CAST(filter_name AS VARCHAR(80)) AS filter_name,':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM (SELECT species as filter_name - FROM GeneAttributes + FROM :SCHEMA.GeneAttributes + WHERE org_abbrev = ':ORG_ABBREV' UNION SELECT organism as filter_name - FROM GeneAttributes) t + FROM :SCHEMA.GeneAttributes + WHERE org_abbrev = ':ORG_ABBREV') t :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/GoTermSummary.psql b/Model/lib/psql/webtables/MO/GoTermSummary.psql index 7757364b49..a233539f0c 100644 --- a/Model/lib/psql/webtables/MO/GoTermSummary.psql +++ b/Model/lib/psql/webtables/MO/GoTermSummary.psql @@ -1,8 +1,6 @@ :CREATE_AND_POPULATE - - - CREATE TABLE :ORG_ABBREVGoTermSummary AS - SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id, + SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id, ggt.taxon_id, ggt.is_not, replace(ggt.go_id, '_', ':') as go_id, ggt.go_term_id, ggt.ontology, replace(ggt.go_term_name, '_',' ') as go_term_name, ggt.source, @@ -14,13 +12,14 @@ when gs.ontology_term_id is null then 0 else 1 end as is_go_slim - FROM :ORG_ABBREVGeneGoTerms ggt - LEFT JOIN :ORG_ABBREVOntologyLevels ol ON ggt.go_term_id = ol.ontology_term_id + FROM :SCHEMA.GeneGoTerms ggt + LEFT JOIN :SCHEMA.OntologyLevels ol ON ggt.go_term_id = ol.ontology_term_id LEFT JOIN ( SELECT distinct ontology_term_id FROM apidb.GoSubset WHERE go_subset_term = 'goslim_generic' ) gs ON ggt.go_term_id = gs.ontology_term_id + WHERE ggt.org_abbrev = ':ORG_ABBREV' UNION SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id, ggt.taxon_id, ggt.is_not, @@ -36,10 +35,10 @@ when gs.ontology_term_id is null then 0 else 1 end as is_go_slim - FROM :ORG_ABBREVGeneGoTerms ggt, sres.OntologyRelationship orel, + FROM :SCHEMA.GeneGoTerms ggt, sres.OntologyRelationship orel, sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr, sres.OntologyTerm ot - LEFT JOIN :ORG_ABBREVOntologyLevels ol ON ot.ontology_term_id = ol.ontology_term_id + LEFT JOIN :SCHEMA.OntologyLevels ol ON ot.ontology_term_id = ol.ontology_term_id LEFT JOIN ( SELECT distinct ontology_term_id FROM apidb.GoSubset @@ -50,6 +49,7 @@ AND edr.external_database_release_id = ot.external_database_release_id AND edr.external_database_id = ed.external_database_id AND ed.name ='GO_RSRC' + WHERE ggt.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords.psql b/Model/lib/psql/webtables/MO/IntronUtrCoords.psql index ac598bbc08..cfed91dac6 100644 --- a/Model/lib/psql/webtables/MO/IntronUtrCoords.psql +++ b/Model/lib/psql/webtables/MO/IntronUtrCoords.psql @@ -1,9 +1,7 @@ :CREATE_AND_POPULATE - - - CREATE TABLE IntronUtrCoords AS SELECT na_feature_id, source_id, - '[' || regexp_replace(string_agg(text,',' ORDER BY start_min), '.quot;', '"' ) || ']' AS gen_rel_intron_utr_coords + '[' || regexp_replace(string_agg(text,',' ORDER BY start_min), '.quot;', '"' ) || ']' AS gen_rel_intron_utr_coords, + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM ( SELECT na_feature_id, source_id, start_min, '["' || feature_type || '",' || start_min || ',' || end_max || ']' AS text @@ -20,16 +18,15 @@ ELSE fl.end_max - tl.start_min + 1 END AS end_max FROM - apidb.TranscriptLocation tl, apidb.FeatureLocation fl + apidb.TranscriptLocation tl, apidb.FeatureLocation fl, dots.nasequence nas WHERE tl.na_feature_id = fl.parent_id + AND fl.na_sequence_id = nas.na_sequence_id + AND nas.taxon_id = :TAXON_ID AND fl.feature_type in('UTR', 'Intron') AND tl.is_top_level = 1 AND fl.is_top_level = 1 ) t1 ) t2 GROUP BY na_feature_id, source_id - - :DECLARE_PARTITION; - diff --git a/Model/lib/psql/webtables/MO/TFBSGene.psql b/Model/lib/psql/webtables/MO/TFBSGene.psql index 3a7103d8ac..882cf31ab9 100644 --- a/Model/lib/psql/webtables/MO/TFBSGene.psql +++ b/Model/lib/psql/webtables/MO/TFBSGene.psql @@ -1,8 +1,8 @@ :CREATE_AND_POPULATE - - - CREATE TABLE TFBSGene AS SELECT DISTINCT + ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, + current_timestamp as modification_date ga.source_id as gene_source_id, ga.organism as organism, ga.genus_species as species, @@ -35,9 +35,10 @@ aef.* FROM dots.BindingSiteFeature aef, apidb.FeatureLocation arrloc, - GeneAttributes ga + :SCHEMA.GeneAttributes ga WHERE aef.na_feature_id = arrloc.na_feature_id - AND arrloc.na_sequence_id = ga.na_sequence_id + AND arrloc.na_sequence_id = ga.na_sequence_id + AND ga.org_abbrev = ':ORG_ABBREV' AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000) or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) ) diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway.psql b/Model/lib/psql/webtables/MO/TranscriptPathway.psql index 4b1c95b668..1490621a67 100644 --- a/Model/lib/psql/webtables/MO/TranscriptPathway.psql +++ b/Model/lib/psql/webtables/MO/TranscriptPathway.psql @@ -1,42 +1,14 @@ :CREATE_AND_POPULATE - - - CREATE TABLE TranscriptPathway ( - SOURCE_ID VARCHAR(80), - GENE_SOURCE_ID VARCHAR(80), - PROJECT_ID VARCHAR(20), - PATHWAY_SOURCE_ID VARCHAR(50), - PATHWAY_NAME VARCHAR(150), - EC_NUMBER_GENE VARCHAR(16), - WILDCARD_COUNT_GENE NUMERIC, - EC_NUMBER_PATHWAY VARCHAR(16), - WILDCARD_COUNT_PATHWAY NUMERIC, - EXACT_MATCH NUMERIC, - COMPLETE_EC NUMERIC, - PATHWAY_ID NUMERIC(12,0), - PATHWAY_SOURCE VARCHAR(200), - EXTERNAL_DATABASE_RELEASE_ID NUMERIC(10,0) - ) - - -:DECLARE_PARTITION; - - - - DO $$ - DECLARE - idlist RECORD; - BEGIN - FOR idlist IN ( SELECT DISTINCT organism FROM GeneAttributes ) - LOOP - INSERT INTO TranscriptPathway WITH transcript_ec AS ( SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, -- CHECK AND FIX -- regexp_count( ec.ec_number, '-') as wildcard_count count( ec.ec_number) as wildcard_count FROM sres.EnzymeClass ec - WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM dots.AaSequenceEnzymeClass) + WHERE enzyme_class_id IN (SELECT aseqEc.enzyme_class_id + FROM dots.AaSequenceEnzymeClass aseqEc, dots.aasequence seq + WHERE aseqEc.aa_sequence_id = seq.aa_sequence_id + AND seq.taxon_id = :TAXON_ID) GROUP BY ec.enzyme_class_id ), pathway_node_ec AS ( @@ -86,27 +58,23 @@ , pa.pathway_id , pa.pathway_source , p.external_database_release_id - FROM PathwayAttributes pa + FROM :SCHEMA.PathwayAttributes pa , sres.pathway p , pathway_node_ec pec , ec_match , dots.AaSequenceEnzymeClass asec - , TranscriptAttributes ga - WHERE ga.organism = idlist.organism + , :SCHEMA.TranscriptAttributes ga + WHERE ga.org_abbrev = ':ORG_ABBREV' AND pa.pathway_id = pec.pathway_id AND p.pathway_id = pa.pathway_id AND pec.enzyme_class_id = ec_match.pathway_enzyme_class_id AND asec.enzyme_class_id = ec_match.transcript_enzyme_class_id AND ga.aa_sequence_id = asec.aa_sequence_id - AND ( - (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived') - OR ga.orthomcl_name IS NOT NULL - ) - ; - commit; - END LOOP; - END; - $$ LANGUAGE PLPGSQL; - + -- TODO: Need new downstream table to bring in OrthoMCLDerived EC associations + -- AND ( + -- (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived') + -- OR ga.orthomcl_name IS NOT NULL + -- ) ; +:DECLARE_PARTITION; diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt index 6f0b461f33..dfcfa55e0e 100644 --- a/Model/lib/xml/tuningManager/tablePruning.txt +++ b/Model/lib/xml/tuningManager/tablePruning.txt @@ -3,11 +3,11 @@ MG ?? ?? ?? -?? +K ?? ?? -?? -?? +K +MO R ?? ?? @@ -15,7 +15,7 @@ R ?? ?? ?? -?? +K ?? ?? MO diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index 6373429173..4b7c8e9056 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -36,54 +36,67 @@ - [X] ProteinAttributes.psql - [ ] TranscriptAttributes_ix.psql - [ ] TranscriptAttributes.psql - - [s] CodingSequence_ix.psql - - [s] CodingSequence.psql - - [ ] IntronUtrCoords_ix.psql - - [ ] IntronUtrCoords.psql - - [s] TranscriptCenDistance_ix.psql - - [s] TranscriptCenDistance.psql + - [X] CodingSequence_ix.psql + - [X] CodingSequence.psql + - [X] IntronUtrCoords_ix.psql + - [X] IntronUtrCoords.psql + - [X] TranscriptCenDistance_ix.psql + - [X] TranscriptCenDistance.psql - [ ] TranscriptPathway_ix.psql - [ ] TranscriptPathway.psql - - [s] TranscriptSequence_ix.psql - - [s] TranscriptSequence.psql - - [s] ChIPchipTranscript_ix.psql - - [s] ChIPchipTranscript.psql + - This may need to move to comparative genomics because we need the OrthoDerived EC mappings + - [X] TranscriptSequence_ix.psql + - [X] TranscriptSequence.psql + - [X] ChIPchipTranscript_ix.psql + - [X] ChIPchipTranscript.psql - Gene - - [ ] GeneId_ix.psql - - [ ] GeneId.psql - - [ ] GeneAttributes_ix.psql - - [ ] GeneAttributes.psql - - [ ] GeneCopyNumbers_ix.psql - - [ ] GeneCopyNumbers.psql - - [ ] GeneGoTable_ix.psql - - [ ] GeneGoTable.psql - - [ ] GeneGoTerms_ix.psql - - [ ] GeneGoTerms.psql - - [ ] GeneLocations_ix.psql - - [ ] GeneLocations.psql - - [ ] GeneModelDump_ix.psql - - [ ] GeneModelDump.psql - - [ ] GeneSummaryFilter_ix.psql - - [ ] GeneSummaryFilter.psql - - [ ] TFBSGene_ix.psql - - [ ] TFBSGene.psql + - [X] GeneId_ix.psql + - [X] GeneId.psql + - [X] GeneAttributes_ix.psql + - [X] GeneAttributes.psql + - [X] GeneCopyNumbers_ix.psql + - [X] GeneCopyNumbers.psql + - [X] GeneGoTable_ix.psql + - [X] GeneGoTable.psql + - [X] GeneGoTerms_ix.psql + - [X] GeneGoTerms.psql + - [X] GeneLocations_ix.psql + - [X] GeneLocations.psql + - [X] GeneModelDump_ix.psql + - [X] GeneModelDump.psql + - [X] GeneSummaryFilter_ix.psql + - [X] GeneSummaryFilter.psql + - [X] TFBSGene_ix.psql + - [X] TFBSGene.psql - [ ] PathwayNodeGene_ix.psql + - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - [ ] PathwayNodeGene.psql + - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - [ ] PathwaysGeneTable_ix.psql + - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - [ ] PathwaysGeneTable.psql - - [ ] GoTermSummary_ix.psql - - [ ] GoTermSummary.psql - - [ ] EqtlSpan_ix.psql - - [ ] EqtlSpan.psql + - This may need to move to comparative genomics because we need the OrthoDerived EC mappings + - [X] GoTermSummary_ix.psql + - [X] GoTermSummary.psql + - [X] EqtlSpan_ix.psql + - [X] EqtlSpan.psql - EST - - [ ] EstAlignmentGeneSummary_ix.psql - - [ ] EstAlignmentGeneSummary.psql - [ ] EstAttributes_ix.psql + - move to comparative + - join to apidb.organism and filter by "is_reference_strain" - [ ] EstAttributes.psql + - move to comparative + - join to apidb.organism and filter by "is_reference_strain" - [ ] EstSequence_ix.psql + - move to comparative + - join to apidb.organism and filter by "is_reference_strain" - [ ] EstSequence.psql + - move to comparative + - join to apidb.organism and filter by "is_reference_strain" + - [X] EstAlignmentGeneSummary_ix.psql + - [X] EstAlignmentGeneSummary.psql - Dataset / Other - [ ] DatasetExampleSourceId_ix.psql @@ -111,4 +124,6 @@ - [ ] NameMappingGIJ_ix.psql - [ ] NameMappingGIJ.psql - [ ] GeneMaxIntronGIJ_ix.psql + - should be aux table and dropped - [ ] GeneMaxIntronGIJ.psql + - should be aux table and dropped From 706a5611763e66f9ea1c42a9745d02fb17c9ef55 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 13 May 2025 16:04:56 -0400 Subject: [PATCH 011/112] convert some more MOs --- .../lib/psql/webtables/MO/ChrCopyNumbers.psql | 7 +- .../webtables/MO/DatasetExampleSourceId.psql | 11 +- .../psql/webtables/MO/OrganismAttributes.psql | 100 +++++++++--------- Model/lib/xml/tuningManager/webtables.org | 8 +- 4 files changed, 62 insertions(+), 64 deletions(-) diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql index d22fda9dd9..8b4b6d5ca1 100644 --- a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql +++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql @@ -1,15 +1,12 @@ :CREATE_AND_POPULATE - - - CREATE TABLE ChrCopyNumbers AS SELECT DISTINCT ta.na_sequence_id , ta.chromosome , ccn.chr_copy_number AS ploidy , io.input_pan_id , io.output_pan_id FROM apidb.ChrCopyNumber ccn - , TranscriptAttributes ta - , PANIo io + , :SCHEMA.TranscriptAttributes ta + , :SCHEMA.PANIo io WHERE ta.na_sequence_id = ccn.na_sequence_id AND ta.chromosome IS NOT NULL AND ccn.protocol_app_node_id = io.output_pan_id diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql index 9d7b830c6d..3d10c6319c 100644 --- a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql +++ b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql @@ -1,20 +1,19 @@ :CREATE_AND_POPULATE - - - CREATE TABLE DatasetExampleSourceId AS WITH profiles AS ( SELECT p.source_id, - ga.project_id, ga.sequence_id, d.name, row_number() over(partition by d.name order by ga.chromosome_order_num, p.profile_as_string desc) as rn FROM Profile p INNER JOIN sres.ExternalDatabase d ON p.dataset_name = d.name - LEFT JOIN GeneAttributes ga ON p.source_id = ga.source_id + LEFT JOIN :SCHEMA.GeneAttributes ga ON p.source_id = ga.source_id WHERE p.profile_as_string is not null + and ga.org_abbrev = ':ORG_ABBREV' ) - SELECT p.source_id as example_source_id, p.project_id, p.sequence_id, p.name as dataset + SELECT + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + p.source_id as example_source_id, p.sequence_id, p.name as dataset FROM profiles p WHERE p.rn = 1 diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql index 1a38fed293..9e49fb17a9 100644 --- a/Model/lib/psql/webtables/MO/OrganismAttributes.psql +++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql @@ -1,5 +1,3 @@ - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVDataSourceCount AS SELECT taxon_id, @@ -13,34 +11,37 @@ FROM dots.externalNAsequence enas, SRES.ontologyterm ot WHERE enas.sequence_ontology_id = ot.ontology_term_id AND ot.name in( 'mitochondrial_chromosome','apicoplast_chromosome') - GROUP BY enas.taxon_id + and enas.taxon_id = :TAXON_ID + GROUP BY enas.taxon_id UNION SELECT distinct ds.taxon_id, 'HTSIsolate' AS stype, 1 AS num FROM apidb.DataSource ds WHERE ds.type = 'isolates' AND ds.subtype = 'HTS_SNP' - GROUP BY taxon_id + AND ds.taxon_id = :TAXON_ID + GROUP BY taxon_id UNION SELECT distinct ds.taxon_id, 'Popset' AS stype, 1 AS num FROM apidb.DataSource ds WHERE ds.subtype = 'sequenceing_types' + AND ds.taxon_id = :TAXON_ID GROUP BY taxon_id UNION SELECT distinct ds.taxon_id, 'Epitope' AS stype, 1 AS num FROM apidb.DataSource ds WHERE ds.type = 'epitope' + AND ds.taxon_id = :TAXON_ID GROUP BY taxon_id UNION SELECT distinct ds.taxon_id, 'Array' AS stype, 1 AS num FROM apidb.DataSource ds WHERE ds.type = 'transcript_expression' AND ds.subtype = 'array' + AND ds.taxon_id = :TAXON_ID GROUP BY taxon_id ) t GROUP BY taxon_id ; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVOrganismCentromere AS SELECT distinct s.taxon_id, @@ -51,11 +52,10 @@ WHERE ot.ontology_term_id = f.sequence_ontology_id AND ot.name='centromere' AND f.na_sequence_id = s.na_sequence_id - GROUP BY s.taxon_id + AND s.taxon_id = :TAXON_ID + GROUP BY s.taxon_id ; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVSequenceCount AS SELECT @@ -65,21 +65,21 @@ max(CASE WHEN sequence_type = 'chromosome' THEN num ELSE null END) as chrom_num FROM ( SELECT count(*) as num, sequence_type, taxon_id - FROM GenomicSeqAttributes + FROM :SCHEMA.GenomicSeqAttributes WHERE is_top_level =1 - GROUP BY taxon_id, sequence_type + and org_abbrev = :ORG_ABBREV + GROUP BY taxon_id, sequence_type ) t GROUP BY taxon_id ; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVCommunityCount AS -- SELECT taxon_id, count(*) as communityCount -- TODO: addd this back select taxon_id, 0 as communityCount - FROM GeneAttributes + FROM :SCHEMA.GeneAttributes + where org_abbrev = ':ORG_ABBREV' --WHERE --(source_id, project_id) IN ( --SELECT distinct stable_id, project_name @@ -115,7 +115,7 @@ else '' end)) as geneArrayCount FROM Profile p - RIGHT OUTER JOIN GeneAttributes ga ON ga.source_id = p.source_id + RIGHT OUTER JOIN :SCHEMA.GeneAttributes ga ON ga.source_id = p.source_id and ga.org_abbrev = ':ORG_ABBREV' GROUP BY ga.taxon_id ; @@ -134,6 +134,7 @@ AND t.table_id = s.query_table_id AND t.name = 'ExternalNASequence' AND s.pvalue_exp <= -10 + and nas.taxon_id = :TAXON_ID ) sim LEFT JOIN (SELECT i.source_id, seq.source_id as sequence_id FROM dots.similarity s, PopsetAttributes i, GeneAttributes g, @@ -146,6 +147,7 @@ AND s.max_subject_end >= g.start_min AND g.na_sequence_id = seq.na_sequence_id AND t.name = 'ExternalNASequence' + and seq.taxon_id = :TAXON_ID ) gene ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id) GROUP BY sim.taxon_id @@ -160,7 +162,7 @@ genomestat.database_version, genomestat.ncbi_tax_id, genomestat.Megabps, - coalesce(snpCount.ct,0) as snpCount, + -- coalesce(snpCount.ct,0) as snpCount, coalesce(count(distinct ga.source_id),0) as geneCount, coalesce(count(distinct case when ga.is_pseudo =1 then ga.source_id else '' end),0) as pseudoGeneCount, coalesce(count(distinct case when (ga.gene_type ='protein coding' or ga.gene_type ='protein coding gene') then ga.source_id else '' end),0) as codingGeneCount, @@ -185,32 +187,32 @@ then mss.aa_sequence_id else NULL end)),0) proteomicsCount, - coalesce(count (distinct (case when ga.is_deprecated = 0 - then est.source_id - else NULL - end)),0) estCount, + -- coalesce(count (distinct (case when ga.is_deprecated = 0 + -- then est.source_id + -- else NULL + -- end)),0) estCount, coalesce(count (distinct (case when (ga.is_deprecated = 0 and ta.ec_numbers is not null) then ga.source_id else NULL end)),0) ecNumberCount FROM GeneAttributes ga LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id - LEFT OUTER JOIN gotermsummary gts on ga.source_id = gts.gene_source_id - LEFT OUTER JOIN TFBSGene tfbs on ga.source_id = tfbs.gene_source_id - LEFT OUTER JOIN TranscriptAttributes ta on ta.gene_source_id = ga.source_id + LEFT OUTER JOIN :SCHEMA.gotermsummary gts on ga.source_id = gts.gene_source_id and gts.org_abbrev = ':ORG_ABBREV' + LEFT OUTER JOIN :SCHEMA.TFBSGene tfbs on ga.source_id = tfbs.gene_source_id and tfbs.org_abbrev = ':ORG_ABBREV' + LEFT OUTER JOIN :SCHEMA.TranscriptAttributes ta on ta.gene_source_id = ga.source_id and ta.org_abbrev = ':ORG_ABBREV' LEFT OUTER JOIN apidb.MassSpecSummary mss on ta.aa_sequence_id = mss.aa_sequence_id - LEFT OUTER JOIN chipchipTranscript cct on ga.source_id = cct.gene_source_id - LEFT OUTER JOIN ( - SELECT distinct s.gene as source_id - FROM EstAlignmentGeneSummary s, EstAttributes e - WHERE s.est_gene_overlap_length >= 100 - AND s.is_best_alignment in (1) - AND s.percent_est_bases_aligned >= 20 - AND s.percent_identity >= 90 - AND e.best_alignment_count <= 1 - AND e.source_id = s.accession - GROUP by s.gene HAVING count(*) >= 1 - ) est ON ga.source_id = est.source_id + LEFT OUTER JOIN :SCHEMA.chipchipTranscript cct on ga.source_id = cct.gene_source_id and cct.org_abbrev = ':ORG_ABBREV' + -- LEFT OUTER JOIN ( + -- SELECT distinct s.gene as source_id + -- FROM EstAlignmentGeneSummary s, EstAttributes e + -- WHERE s.est_gene_overlap_length >= 100 + -- AND s.is_best_alignment in (1) + -- AND s.percent_est_bases_aligned >= 20 + -- AND s.percent_identity >= 90 + -- AND e.best_alignment_count <= 1 + -- AND e.source_id = s.accession + -- GROUP by s.gene HAVING count(*) >= 1 + -- ) est ON ga.source_id = est.source_id RIGHT OUTER JOIN ( SELECT project_id, taxon_id, max(database_version) as database_version, @@ -218,30 +220,28 @@ ELSE ncbi_tax_id END ncbi_tax_id, to_char(sum(length)/1000000,'9999.99') as megabps - FROM GenomicSeqAttributes + FROM :SCHEMA.GenomicSeqAttributes WHERE is_top_level = 1 + AND org_abbrev = ':ORG_ABBREV' GROUP BY project_ID, taxon_id, ncbi_tax_id ) genomestat ON genomestat.taxon_id = ga.taxon_id - LEFT OUTER JOIN ( - SELECT count(distinct ga.source_id) as ct, ga.taxon_id - FROM GeneAttributes ga, SnpAttributes sf - WHERE sf.gene_source_id = ga.source_id - AND ga.is_deprecated = 0 - GROUP BY ga.taxon_id - ) snpCount ON ga.taxon_id = snpCount.taxon_id + -- LEFT OUTER JOIN ( + -- SELECT count(distinct ga.source_id) as ct, ga.taxon_id + -- FROM GeneAttributes ga, SnpAttributes sf + -- WHERE sf.gene_source_id = ga.source_id + -- AND ga.is_deprecated = 0 + -- GROUP BY ga.taxon_id + -- ) snpCount ON ga.taxon_id = snpCount.taxon_id GROUP BY genomestat.taxon_id, genomestat.project_id, genomestat.database_version, genomestat.ncbi_tax_id, - genomestat.Megabps, - snpCount.ct + genomestat.Megabps + -- snpCount.ct ; :CREATE_AND_POPULATE - - - CREATE TABLE OrganismAttributes AS SELECT oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id , CASE WHEN ltrim(replace(oa.organism_name, tn2.name, ''))= oa.organism_name THEN strain_abbrev @@ -310,7 +310,8 @@ LEFT JOIN ( SELECT taxon_id, round(avg(length),1) as avg_transcript_length FROM TranscriptAttributes - GROUP by taxon_id + where org_abbrev = ':ORG_ABBREV' + GROUP by taxon_id ) ta ON o.taxon_id = ta.taxon_id WHERE tn.name_class = 'scientific name' ) oa, @@ -321,6 +322,7 @@ AND ts.species_taxon_id = t.taxon_id AND ts.species_taxon_id = tn2.taxon_id AND tn2.name_class = 'scientific name' + and o.taxon_id = :TAXON_ID :DECLARE_PARTITION; diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index 4b7c8e9056..cd642166d8 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -99,8 +99,8 @@ - [X] EstAlignmentGeneSummary.psql - Dataset / Other - - [ ] DatasetExampleSourceId_ix.psql - - [ ] DatasetExampleSourceId.psql + - [s] DatasetExampleSourceId_ix.psql + - [s] DatasetExampleSourceId.psql - [ ] Profile_ix.psql - [ ] Profile.psql - [ ] ProfileSamples_ix.psql @@ -109,8 +109,8 @@ - [ ] ProfileType.psql - [ ] RnaSeqStats_ix.psql - [ ] RnaSeqStats.psql - - [ ] OrganismAttributes_ix.psql - - [ ] OrganismAttributes.psql + - [s] OrganismAttributes_ix.psql + - [s] OrganismAttributes.psql - [ ] ChrCopyNumbers_ix.psql - [ ] ChrCopyNumbers.psql From 8b2139611dc942acfb10ee0b00ada7d2e0e0a872 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 13 May 2025 16:09:07 -0400 Subject: [PATCH 012/112] convert some more MOs --- Model/lib/psql/webtables/MO/ChrCopyNumbers.psql | 4 +++- Model/lib/xml/tuningManager/webtables.org | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql index 8b4b6d5ca1..5b81bd75e7 100644 --- a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql +++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql @@ -6,10 +6,12 @@ , io.output_pan_id FROM apidb.ChrCopyNumber ccn , :SCHEMA.TranscriptAttributes ta - , :SCHEMA.PANIo io + , :SCHEMA.PANIo io WHERE ta.na_sequence_id = ccn.na_sequence_id AND ta.chromosome IS NOT NULL AND ccn.protocol_app_node_id = io.output_pan_id + and ta.org_abbrev = ':ORG_ABBREV' + and io.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index cd642166d8..e8923cdb98 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -111,8 +111,8 @@ - [ ] RnaSeqStats.psql - [s] OrganismAttributes_ix.psql - [s] OrganismAttributes.psql - - [ ] ChrCopyNumbers_ix.psql - - [ ] ChrCopyNumbers.psql + - [s] ChrCopyNumbers_ix.psql + - [s] ChrCopyNumbers.psql - Junctions (Kathryn) - [ ] IntronSupportLevel_ix.psql From f7c173d3fbe96ab400150eff5a4a250151aac6fc Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Tue, 13 May 2025 16:11:05 -0400 Subject: [PATCH 013/112] filled in some ? --- Model/lib/xml/tuningManager/tablePruning.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt index dfcfa55e0e..3062f04c2c 100644 --- a/Model/lib/xml/tuningManager/tablePruning.txt +++ b/Model/lib/xml/tuningManager/tablePruning.txt @@ -1,23 +1,23 @@ MG -?? +MO ?? ?? -?? +K K -?? +R ?? K MO R -?? -?? +MO +MO ?? -?? -?? -?? +MO +K +MO (Pretty sure this is MO... but we do have profiles for compounds (not genes). they should be handled different) K -?? -?? +K +MC MO MO MO (could be renamed to LineageForSynteny) From 2f7e259dba5837150dc23b48b9e0eb0086e6cfae Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 13 May 2025 16:14:05 -0400 Subject: [PATCH 014/112] rename ?? to UK --- Model/lib/psql/webtables/{?? => UK}/AlphaFoldGenes.psql | 0 Model/lib/psql/webtables/{?? => UK}/AssociatedDataset.psql | 0 Model/lib/psql/webtables/{?? => UK}/DatasetDetail.psql | 0 Model/lib/psql/webtables/{?? => UK}/DatasetPresenter.psql | 0 Model/lib/psql/webtables/{?? => UK}/DomainAssignment.psql | 0 Model/lib/psql/webtables/{?? => UK}/EdaGeneGraph.psql | 0 Model/lib/psql/webtables/{?? => UK}/EupathBuildDates.psql | 0 .../lib/psql/webtables/{?? => UK}/ExternalDbDatasetPresenter.psql | 0 .../lib/psql/webtables/{?? => UK}/ExternalSequenceTaxonRank.psql | 0 Model/lib/psql/webtables/{?? => UK}/GeneGroupProfile.psql | 0 Model/lib/psql/webtables/{?? => UK}/GroupPhylogeneticProfile.psql | 0 Model/lib/psql/webtables/{?? => UK}/OrthologousTranscripts.psql | 0 Model/lib/psql/webtables/{?? => UK}/PANExtDbRls.psql | 0 Model/lib/psql/webtables/{?? => UK}/PANIO.psql | 0 Model/lib/psql/webtables/{?? => UK}/PANResults.psql | 0 Model/lib/psql/webtables/{?? => UK}/PhyleticPattern.psql | 0 Model/lib/psql/webtables/{?? => UK}/ProjectTaxon.psql | 0 Model/lib/psql/webtables/{?? => UK}/SequenceAttributes.psql | 0 Model/lib/psql/webtables/{?? => UK}/SequenceEnzymeClass.psql | 0 Model/lib/psql/webtables/{?? => UK}/StudyIdDatasetId.psql | 0 Model/lib/psql/webtables/{?? => UK}/TypeAheadCounts.psql | 0 21 files changed, 0 insertions(+), 0 deletions(-) rename Model/lib/psql/webtables/{?? => UK}/AlphaFoldGenes.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/AssociatedDataset.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/DatasetDetail.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/DatasetPresenter.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/DomainAssignment.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/EdaGeneGraph.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/EupathBuildDates.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/ExternalDbDatasetPresenter.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/ExternalSequenceTaxonRank.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/GeneGroupProfile.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/GroupPhylogeneticProfile.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/OrthologousTranscripts.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/PANExtDbRls.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/PANIO.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/PANResults.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/PhyleticPattern.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/ProjectTaxon.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/SequenceAttributes.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/SequenceEnzymeClass.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/StudyIdDatasetId.psql (100%) rename Model/lib/psql/webtables/{?? => UK}/TypeAheadCounts.psql (100%) diff --git a/Model/lib/psql/webtables/??/AlphaFoldGenes.psql b/Model/lib/psql/webtables/UK/AlphaFoldGenes.psql similarity index 100% rename from Model/lib/psql/webtables/??/AlphaFoldGenes.psql rename to Model/lib/psql/webtables/UK/AlphaFoldGenes.psql diff --git a/Model/lib/psql/webtables/??/AssociatedDataset.psql b/Model/lib/psql/webtables/UK/AssociatedDataset.psql similarity index 100% rename from Model/lib/psql/webtables/??/AssociatedDataset.psql rename to Model/lib/psql/webtables/UK/AssociatedDataset.psql diff --git a/Model/lib/psql/webtables/??/DatasetDetail.psql b/Model/lib/psql/webtables/UK/DatasetDetail.psql similarity index 100% rename from Model/lib/psql/webtables/??/DatasetDetail.psql rename to Model/lib/psql/webtables/UK/DatasetDetail.psql diff --git a/Model/lib/psql/webtables/??/DatasetPresenter.psql b/Model/lib/psql/webtables/UK/DatasetPresenter.psql similarity index 100% rename from Model/lib/psql/webtables/??/DatasetPresenter.psql rename to Model/lib/psql/webtables/UK/DatasetPresenter.psql diff --git a/Model/lib/psql/webtables/??/DomainAssignment.psql b/Model/lib/psql/webtables/UK/DomainAssignment.psql similarity index 100% rename from Model/lib/psql/webtables/??/DomainAssignment.psql rename to Model/lib/psql/webtables/UK/DomainAssignment.psql diff --git a/Model/lib/psql/webtables/??/EdaGeneGraph.psql b/Model/lib/psql/webtables/UK/EdaGeneGraph.psql similarity index 100% rename from Model/lib/psql/webtables/??/EdaGeneGraph.psql rename to Model/lib/psql/webtables/UK/EdaGeneGraph.psql diff --git a/Model/lib/psql/webtables/??/EupathBuildDates.psql b/Model/lib/psql/webtables/UK/EupathBuildDates.psql similarity index 100% rename from Model/lib/psql/webtables/??/EupathBuildDates.psql rename to Model/lib/psql/webtables/UK/EupathBuildDates.psql diff --git a/Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql b/Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql similarity index 100% rename from Model/lib/psql/webtables/??/ExternalDbDatasetPresenter.psql rename to Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql diff --git a/Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql b/Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql similarity index 100% rename from Model/lib/psql/webtables/??/ExternalSequenceTaxonRank.psql rename to Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql diff --git a/Model/lib/psql/webtables/??/GeneGroupProfile.psql b/Model/lib/psql/webtables/UK/GeneGroupProfile.psql similarity index 100% rename from Model/lib/psql/webtables/??/GeneGroupProfile.psql rename to Model/lib/psql/webtables/UK/GeneGroupProfile.psql diff --git a/Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql b/Model/lib/psql/webtables/UK/GroupPhylogeneticProfile.psql similarity index 100% rename from Model/lib/psql/webtables/??/GroupPhylogeneticProfile.psql rename to Model/lib/psql/webtables/UK/GroupPhylogeneticProfile.psql diff --git a/Model/lib/psql/webtables/??/OrthologousTranscripts.psql b/Model/lib/psql/webtables/UK/OrthologousTranscripts.psql similarity index 100% rename from Model/lib/psql/webtables/??/OrthologousTranscripts.psql rename to Model/lib/psql/webtables/UK/OrthologousTranscripts.psql diff --git a/Model/lib/psql/webtables/??/PANExtDbRls.psql b/Model/lib/psql/webtables/UK/PANExtDbRls.psql similarity index 100% rename from Model/lib/psql/webtables/??/PANExtDbRls.psql rename to Model/lib/psql/webtables/UK/PANExtDbRls.psql diff --git a/Model/lib/psql/webtables/??/PANIO.psql b/Model/lib/psql/webtables/UK/PANIO.psql similarity index 100% rename from Model/lib/psql/webtables/??/PANIO.psql rename to Model/lib/psql/webtables/UK/PANIO.psql diff --git a/Model/lib/psql/webtables/??/PANResults.psql b/Model/lib/psql/webtables/UK/PANResults.psql similarity index 100% rename from Model/lib/psql/webtables/??/PANResults.psql rename to Model/lib/psql/webtables/UK/PANResults.psql diff --git a/Model/lib/psql/webtables/??/PhyleticPattern.psql b/Model/lib/psql/webtables/UK/PhyleticPattern.psql similarity index 100% rename from Model/lib/psql/webtables/??/PhyleticPattern.psql rename to Model/lib/psql/webtables/UK/PhyleticPattern.psql diff --git a/Model/lib/psql/webtables/??/ProjectTaxon.psql b/Model/lib/psql/webtables/UK/ProjectTaxon.psql similarity index 100% rename from Model/lib/psql/webtables/??/ProjectTaxon.psql rename to Model/lib/psql/webtables/UK/ProjectTaxon.psql diff --git a/Model/lib/psql/webtables/??/SequenceAttributes.psql b/Model/lib/psql/webtables/UK/SequenceAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/??/SequenceAttributes.psql rename to Model/lib/psql/webtables/UK/SequenceAttributes.psql diff --git a/Model/lib/psql/webtables/??/SequenceEnzymeClass.psql b/Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql similarity index 100% rename from Model/lib/psql/webtables/??/SequenceEnzymeClass.psql rename to Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql diff --git a/Model/lib/psql/webtables/??/StudyIdDatasetId.psql b/Model/lib/psql/webtables/UK/StudyIdDatasetId.psql similarity index 100% rename from Model/lib/psql/webtables/??/StudyIdDatasetId.psql rename to Model/lib/psql/webtables/UK/StudyIdDatasetId.psql diff --git a/Model/lib/psql/webtables/??/TypeAheadCounts.psql b/Model/lib/psql/webtables/UK/TypeAheadCounts.psql similarity index 100% rename from Model/lib/psql/webtables/??/TypeAheadCounts.psql rename to Model/lib/psql/webtables/UK/TypeAheadCounts.psql From ff0a3fce9aef42e053466a1575958767033da475 Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Tue, 13 May 2025 16:17:36 -0400 Subject: [PATCH 015/112] filled in some ? --- Model/lib/xml/tuningManager/tablePruning.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt index 3062f04c2c..aa94aa6da9 100644 --- a/Model/lib/xml/tuningManager/tablePruning.txt +++ b/Model/lib/xml/tuningManager/tablePruning.txt @@ -116,8 +116,8 @@ MO R MO R -?? (could be put into the orthomcl graph. Rich and John should look) -?? (similar to OrthologousTranscripts) +MC (could be put into the orthomcl graph. Rich and John should look) +MC (similar to OrthologousTranscripts) R R MO (PANIO will need to exist) @@ -128,9 +128,9 @@ K MO K MO (rm auto_lob; don't need to loop over chunks in postgres) -?? +MC K K -?? (can we do project specific alphafold? or, put this in after alphafold, whereever it goes) +MC (alpha fold is cross project) MO K From d666a39ba238dddb7a9e62ea243a1537ad800e0d Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 13 May 2025 16:59:01 -0400 Subject: [PATCH 016/112] more updates --- .../lib/psql/webtables/MO/ChrCopyNumbers.psql | 6 +++- .../psql/webtables/MO/OrganismAttributes.psql | 3 +- .../{UK => MO}/SequenceEnzymeClass.psql | 29 ++++++------------- .../webtables/MO/SequenceEnzymeClass_ix.psql | 7 +++++ Model/lib/xml/tuningManager/webtables.org | 6 +++- 5 files changed, 28 insertions(+), 23 deletions(-) rename Model/lib/psql/webtables/{UK => MO}/SequenceEnzymeClass.psql (58%) create mode 100644 Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql index 5b81bd75e7..eafe7652b8 100644 --- a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql +++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql @@ -1,5 +1,9 @@ :CREATE_AND_POPULATE - SELECT DISTINCT ta.na_sequence_id + SELECT DISTINCT + ta.project_id + , ta.org_abbrev + , current_timestamp as modification_date + , ta.na_sequence_id , ta.chromosome , ccn.chr_copy_number AS ploidy , io.input_pan_id diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql index 9e49fb17a9..1f68de2f75 100644 --- a/Model/lib/psql/webtables/MO/OrganismAttributes.psql +++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql @@ -242,7 +242,8 @@ ; :CREATE_AND_POPULATE - SELECT oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id + SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + oa.*, tn2.name as species, t.ncbi_tax_id as species_ncbi_tax_id , CASE WHEN ltrim(replace(oa.organism_name, tn2.name, ''))= oa.organism_name THEN strain_abbrev ELSE ltrim(replace(oa.organism_name, tn2.name, '')) END AS strain diff --git a/Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql b/Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql similarity index 58% rename from Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql rename to Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql index 35fdff6793..f948ba46c7 100644 --- a/Model/lib/psql/webtables/UK/SequenceEnzymeClass.psql +++ b/Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql @@ -1,7 +1,8 @@ - - - CREATE TABLE SequenceEnzymeClass AS ( - SELECT sa.full_id +:CREATE_AND_POPULATE + SELECT sa.project_id + , sa.org_abbrev + , current_timestamp as modification_date + , sa.full_id , sa.group_name -- , sec.uniprot_accession , ec.ec_number @@ -11,26 +12,14 @@ , ec.ec_number_2 , ec.ec_number_3 , ec.ec_number_4 - FROM sequenceattributes sa + FROM :SCHEMA.sequenceattributes sa , dots.AASequence aa , dots.AASequenceEnzymeClass sec , sres.ENZYMECLASS ec WHERE sa.aa_sequence_id = aa.aa_sequence_id AND sec.aa_sequence_id = aa.aa_sequence_id AND sec.enzyme_class_id = ec.enzyme_class_id - ) - - ; - - - - CREATE INDEX SequenceEnzymeClass_idx1 ON SequenceEnzymeClass (group_name, ec_number, description) - - ; - - - - CREATE INDEX SequenceEnzymeClass_idx2 ON SequenceEnzymeClass (full_id, ec_number, description) - - ; + AND sa.org_abbrev = ':ORG_ABBREV' +:DECLARE_PARTITION +; diff --git a/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql new file mode 100644 index 0000000000..9a55de605a --- /dev/null +++ b/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql @@ -0,0 +1,7 @@ + + CREATE INDEX :SCHEMA.SequenceEnzymeClass_idx1 ON :SCHEMA.SequenceEnzymeClass (group_name, ec_number, description) + ; + + CREATE INDEX :SCHEMA.SequenceEnzymeClass_idx2 ON :SCHEMA.SequenceEnzymeClass (full_id, ec_number, description) + ; + diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index e8923cdb98..2ffa4f7f3f 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -22,7 +22,8 @@ - [X] GenomicSequenceSequence.psql - [X] SequencePieceClosure - [X] GenomicSeqAttributes - + - [s] SequenceEnzymeClass + - Transcript / Protein - [X] SignalPeptideDomains_ix.psql - [X] SignalPeptideDomains.psql @@ -101,6 +102,8 @@ - Dataset / Other - [s] DatasetExampleSourceId_ix.psql - [s] DatasetExampleSourceId.psql + - [ ] PANIO.psql + - [ ] PANIO_ix.psql - [ ] Profile_ix.psql - [ ] Profile.psql - [ ] ProfileSamples_ix.psql @@ -111,6 +114,7 @@ - [ ] RnaSeqStats.psql - [s] OrganismAttributes_ix.psql - [s] OrganismAttributes.psql + - removed ESTs and SNPs - [s] ChrCopyNumbers_ix.psql - [s] ChrCopyNumbers.psql From 536410be5772bfa030d7412808c19ccc385561d2 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Wed, 14 May 2025 11:43:14 -0400 Subject: [PATCH 017/112] touch up transattrs --- Model/lib/psql/webtables/MO/TranscriptAttributes.psql | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql index 069c605c63..e82dea4d5b 100644 --- a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql @@ -431,25 +431,24 @@ ; - -- TODO: THIS IS BROKEN. UPDATE :SCHEMA.TranscriptAttributes SET representative_transcript = ( select min(source_id) from :SCHEMA.TranscriptAttributes ga where ga.gene_source_id = :SCHEMA.TranscriptAttributes.gene_source_id + and org_abbrev = ':ORG_ABBREV' ) WHERE representative_transcript is null AND gene_id is not null - + and org_abbrev = ':ORG_ABBREV' ; - -- TODO: THIS IS BROKEN. UPDATE :ORG_ABBREVTranscriptAttributes SET representative_transcript = source_id WHERE representative_transcript is null - + and org_abbrev = ':ORG_ABBREV' ; - -- TODO: THIS IS BROKEN. + drop table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp ; From d13012bd9e1ed9194618249674bbc3a55cda2675 Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Wed, 14 May 2025 16:36:51 -0400 Subject: [PATCH 018/112] wip --- Model/lib/psql/webtables/UK/PANExtDbRls.psql | 18 ++++-- Model/lib/psql/webtables/UK/PANIO.psql | 59 ++++---------------- 2 files changed, 24 insertions(+), 53 deletions(-) diff --git a/Model/lib/psql/webtables/UK/PANExtDbRls.psql b/Model/lib/psql/webtables/UK/PANExtDbRls.psql index 82b469f127..524ef017c5 100644 --- a/Model/lib/psql/webtables/UK/PANExtDbRls.psql +++ b/Model/lib/psql/webtables/UK/PANExtDbRls.psql @@ -1,7 +1,7 @@ +:CREATE_AND_POPULATE - - CREATE TABLE :ORG_ABBREVPANExtDbRls AS - SELECT distinct protocol_app_node_id as pan_id, external_database_release_id, name as dataset_name + SELECT distinct protocol_app_node_id as pan_id, external_database_release_id, name as dataset_name, + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM ( SELECT sl.protocol_app_node_id @@ -12,11 +12,15 @@ , study.nodeNodeSet sl , sres.externaldatabaserelease r , sres.externaldatabase d + , apidb.datasource ds WHERE s.external_database_release_id = r.external_database_release_id and r.external_database_id = d.external_database_id and s.node_set_id = sl.node_set_id and s.external_database_release_id is not null + and d.name = ds.external_database_name + and r.version = ds.version + and ds.taxon_id = :TAXON_ID UNION SELECT pan.protocol_app_node_id , pan.external_database_release_id @@ -24,13 +28,15 @@ FROM study.protocolappnode pan , sres.externaldatabaserelease r , sres.externaldatabase d + , apidb.datasource ds WHERE pan.external_database_release_id = r.external_database_release_id and r.external_database_id = d.external_database_id and pan.external_database_release_id is not null + and d.name = ds.external_database_name + and r.version = ds.version + and ds.taxon_id = :TAXON_ID ) t - WHERE (name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) ORDER BY external_database_release_id, protocol_app_node_id - ; - +:DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/UK/PANIO.psql b/Model/lib/psql/webtables/UK/PANIO.psql index fa820f9d4b..c7afcbeca5 100644 --- a/Model/lib/psql/webtables/UK/PANIO.psql +++ b/Model/lib/psql/webtables/UK/PANIO.psql @@ -1,6 +1,4 @@ - - - CREATE TABLE :ORG_ABBREVPANIO AS +:CREATE_AND_POPULATE SELECT DISTINCT io.* FROM ( SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id, @@ -11,49 +9,16 @@ out_type.source_id as output_pan_type_source_id, --out_type.name as output_pan_type, out_type.ontology_term_id as output_pan_type_id - FROM study.ProtocolApp pa, study.Input i, study.Output o, - study.ProtocolAppNode in_pan LEFT JOIN sres.OntologyTerm in_type ON in_pan.type_id = in_type.ontology_term_id, - study.ProtocolAppNode out_pan LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id - WHERE i.protocol_app_id = pa.protocol_app_id - AND o.protocol_app_id = pa.protocol_app_id - AND i.protocol_app_node_id = in_pan.protocol_app_node_id - AND o.protocol_app_node_id = out_pan.protocol_app_node_id - ) io, :ORG_ABBREVpanextdbrls panExtDbRls - WHERE io.input_pan_id = panExtDbRls.pan_id -- the input and outputs will have same dataset in prefix enabled mode only - AND (panExtDbRls.dataset_name = ':TAXON_IDValue' or length(':TAXON_IDValue') = 0) + FROM :SCHEMA.panextdbrls panExtDbRls, + INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id + INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id + INNER JOIN study.Output o AND o.protocol_app_id = pa.protocol_app_id + INNER JOIN study.ProtocolAppNode in_pan on i.protocol_app_node_id = in_pan.protocol_app_node_id + INNER JOIN study.ProtocolAppNode out_pan on o.protocol_app_node_id = out_pan.protocol_app_node_id + LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id + LEFT JOIN sres.OntologyTerm in_type ON in_pan.type_id = in_type.ontology_term_id + WHERE panExtDbRls.org_abbrev = ':ORG_ABBREV' + ) io ORDER BY io.input_pan_id, io.output_pan_id - ; - - - - create index :ORG_ABBREVpainio2_iix on :ORG_ABBREVPANIO - (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) - - - ; - - - - create index :ORG_ABBREVpainio2_oix on :ORG_ABBREVPANIO - (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) - - - ; - - - - create index :ORG_ABBREVpainio2_otypeix on :ORG_ABBREVPANIO - (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id) - - - ; - - - - create index :ORG_ABBREVpainio2_itypeix on :ORG_ABBREVPANIO - (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id) - - - ; - +:DECLARE_PARTITION; From 099f1a08616fb7df8c9df4ca9c3f5393e579eea2 Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Wed, 14 May 2025 17:00:52 -0400 Subject: [PATCH 019/112] move or remove some psql files --- .../webtables/{UK => MO}/PANExtDbRls.psql | 0 .../lib/psql/webtables/{UK => MO}/PANIO.psql | 0 .../lib/psql/webtables/UK/DatasetDetail.psql | 58 ------------------ .../psql/webtables/UK/DatasetPresenter.psql | 0 .../psql/webtables/UK/EupathBuildDates.psql | 0 .../UK/ExternalDbDatasetPresenter.psql | 46 -------------- .../UK/ExternalSequenceTaxonRank.psql | 60 ------------------- .../psql/webtables/UK/GeneGroupProfile.psql | 30 ---------- .../psql/webtables/UK/StudyIdDatasetId.psql | 24 -------- 9 files changed, 218 deletions(-) rename Model/lib/psql/webtables/{UK => MO}/PANExtDbRls.psql (100%) rename Model/lib/psql/webtables/{UK => MO}/PANIO.psql (100%) delete mode 100644 Model/lib/psql/webtables/UK/DatasetDetail.psql delete mode 100644 Model/lib/psql/webtables/UK/DatasetPresenter.psql delete mode 100644 Model/lib/psql/webtables/UK/EupathBuildDates.psql delete mode 100644 Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql delete mode 100644 Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql delete mode 100644 Model/lib/psql/webtables/UK/GeneGroupProfile.psql delete mode 100644 Model/lib/psql/webtables/UK/StudyIdDatasetId.psql diff --git a/Model/lib/psql/webtables/UK/PANExtDbRls.psql b/Model/lib/psql/webtables/MO/PANExtDbRls.psql similarity index 100% rename from Model/lib/psql/webtables/UK/PANExtDbRls.psql rename to Model/lib/psql/webtables/MO/PANExtDbRls.psql diff --git a/Model/lib/psql/webtables/UK/PANIO.psql b/Model/lib/psql/webtables/MO/PANIO.psql similarity index 100% rename from Model/lib/psql/webtables/UK/PANIO.psql rename to Model/lib/psql/webtables/MO/PANIO.psql diff --git a/Model/lib/psql/webtables/UK/DatasetDetail.psql b/Model/lib/psql/webtables/UK/DatasetDetail.psql deleted file mode 100644 index 1ec5eed7be..0000000000 --- a/Model/lib/psql/webtables/UK/DatasetDetail.psql +++ /dev/null @@ -1,58 +0,0 @@ - - - CREATE TABLE DatasetDetail AS - SELECT dataset_presenter_id, - name || ' ' || category || ' ' || usage || ' ' || - caveat || ' ' || acknowledgement || ' ' || type || ' ' || subtype - ||' ' || summary || ' ' || description || ' ' || contact || ' ' || - institution || ' ' || pubmed_id || ' ' || citation as search_string - FROM ( - SELECT - sub.dataset_presenter_id as dataset_presenter_id, - sub.name as name, - sub.category as category, - sub.usage as usage, - sub.caveat as caveat, - sub.acknowledgement as acknowledgement, - sub.type as type, - sub.subtype as subtype, - sub.contact, - sub.institution, - sub.pubmed_id, - sub.citation, - dp.summary, - dp.description - FROM DatasetPresenter dp, - ( - SELECT DISTINCT - dp.dataset_presenter_id as dataset_presenter_id, - dp.display_name as name, - dp.display_category as category, - dp.usage as usage, - dp.caveat as caveat, - dp.acknowledgement as acknowledgement, - dp.type as type, - dp.subtype as subtype, - dc.name as contact, - dc.affiliation as institution, - string_agg(dpub.pmid, ' ' ORDER BY dpub.pmid) as pubmed_id, - -- CHECK AND FIX - regexp_like ISSUE - --string_agg(CASE WHEN REGEXP_LIKE(dpub.citation, '[[:digit:]]{4};') - -- THEN substr(citation, 1, regexp_instr(citation, '[[:digit:]]{4};' ) - 1) - -- ELSE dpub.citation - -- END , ' ' ORDER BY dpub.citation) as citation - string_agg(dpub.citation, ' ' ORDER BY dpub.citation) as citation - FROM DatasetPresenter dp, DatasetContact dc, - DatasetPublication dpub - WHERE dp.dataset_presenter_id = dc.dataset_presenter_id - AND dp.dataset_presenter_id = dpub.dataset_presenter_id - AND dc.is_primary_contact = true - GROUP by dp.dataset_presenter_id, dp.display_name,dp.display_category, - dp.usage,dp.caveat,dp.acknowledgement,dp.type,dp.subtype,dc.name, - dc.affiliation - ) sub - WHERE dp.dataset_presenter_id = sub.dataset_presenter_id - ) t - - ; - diff --git a/Model/lib/psql/webtables/UK/DatasetPresenter.psql b/Model/lib/psql/webtables/UK/DatasetPresenter.psql deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/Model/lib/psql/webtables/UK/EupathBuildDates.psql b/Model/lib/psql/webtables/UK/EupathBuildDates.psql deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql b/Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql deleted file mode 100644 index 0b8856e4e5..0000000000 --- a/Model/lib/psql/webtables/UK/ExternalDbDatasetPresenter.psql +++ /dev/null @@ -1,46 +0,0 @@ - - - CREATE TABLE ExternalDbDatasetPresenter AS - SELECT ed.external_database_id, ed.name AS external_database_name, - edr.external_database_release_id, SUBSTR(edr.version, 1, 40) AS external_database_version, - dsp.dataset_presenter_id, dsp.name AS dataset_presenter_name, - dsp.display_name AS dataset_presenter_display_name - FROM sres.externalDatabaseRelease edr, sres.externalDatabase ed, DatasetPresenter dsp - WHERE ed.external_database_id = edr.external_database_id - AND (ed.name = dsp.name - OR ed.name LIKE dsp.dataset_name_pattern) - ORDER BY ed.name - - ; - - - - create index edd_rlsidix - on ExternalDbDatasetPresenter - (external_database_release_id, external_database_id, external_database_name, - dataset_presenter_id, dataset_presenter_name, dataset_presenter_display_name) - - - ; - - - - create index edd_dsidix - on ExternalDbDatasetPresenter - (dataset_presenter_id, external_database_id, external_database_release_id, - external_database_name, dataset_presenter_name, dataset_presenter_display_name) - - - ; - - - - create index edd_dsnameix - on ExternalDbDatasetPresenter - (dataset_presenter_name, dataset_presenter_id, external_database_id, - external_database_release_id, external_database_name, external_database_version, - dataset_presenter_display_name) - - - ; - diff --git a/Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql b/Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql deleted file mode 100644 index 1dd9673b23..0000000000 --- a/Model/lib/psql/webtables/UK/ExternalSequenceTaxonRank.psql +++ /dev/null @@ -1,60 +0,0 @@ - - - CREATE TABLE ExternalSequenceTaxonRank AS - WITH organism_rank AS ( - SELECT tn1.taxon_id as organism, tn2.name as parent_organism, - tn2.taxon_id as parent_organism_id, r.rank - FROM sres.TaxonName tn1, sres.TaxonName tn2, - ( - WITH RECURSIVE cte AS ( - SELECT taxon_id as input, taxon_id, rank, parent_id - FROM sres.taxon - WHERE taxon_id IN ( - SELECT taxon_id FROM dots.externalaasequence - UNION - SELECT taxon_id FROM apidb.taxonstring - ) - UNION - SELECT cte.input, t.taxon_id, t.rank, t.parent_id - FROM sres.taxon t, cte - WHERE cte.parent_id = t.taxon_id - ) - SELECT input, taxon_id, rank - FROM cte - ) r - WHERE r.input = tn1.taxon_id - and r.taxon_id = tn2.taxon_id - and tn1.name_class = 'scientific name' - and tn2.name_class = 'scientific name' - and r.rank in ('phylum', 'family','genus', 'species', 'superkingdom','kingdom', 'class', 'order') - ) - SELECT organisms.organism, - coalesce(superkingdom.parent_organism, 'N/A') as superkingdom, - superkingdom.parent_organism_id as superkingdom_id, - coalesce(kingdom.parent_organism, 'N/A') as kingdom, - kingdom.parent_organism_id as kingdom_id, - coalesce (phylum.parent_organism, 'N/A') as phylum, - phylum.parent_organism_id as phylum_id, - coalesce (class.parent_organism, 'N/A') as class, - class.parent_organism_id as class_id, - coalesce (family.parent_organism, 'N/A') as family, - family.parent_organism_id as family_id, - coalesce (rank_order.parent_organism, 'N/A') as rank_order, - rank_order.parent_organism_id as rank_order_id, - coalesce ( genus.parent_organism, 'N/A') as genus, - genus.parent_organism_id as genus_id, - coalesce(species.parent_organism, 'N/A') as species, - species.parent_organism_id as species_id - FROM (SELECT DISTINCT organism FROM organism_rank) organisms - LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'phylum') phylum ON organisms.organism = phylum.organism - LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'genus') genus ON organisms.organism = genus.organism - LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'species') species ON organisms.organism = species.organism - LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'kingdom') kingdom ON organisms.organism = kingdom.organism - LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'superkingdom') superkingdom ON organisms.organism = superkingdom.organism - LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'class') class ON organisms.organism = class.organism - LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'family') family ON organisms.organism = family.organism - LEFT JOIN (SELECT * FROM organism_rank WHERE rank= 'order') rank_order ON organisms.organism = rank_order.organism - ORDER BY organism, species, genus - - ; - diff --git a/Model/lib/psql/webtables/UK/GeneGroupProfile.psql b/Model/lib/psql/webtables/UK/GeneGroupProfile.psql deleted file mode 100644 index e91fd35148..0000000000 --- a/Model/lib/psql/webtables/UK/GeneGroupProfile.psql +++ /dev/null @@ -1,30 +0,0 @@ - - - create table GeneGroupProfile as - select distinct other_gene.source_id, p.dataset_name, - this_gene.source_id as profile_graph_id - from OrthologousTranscripts ot - , Profile p - , GeneAttributes this_gene - , GeneAttributes other_gene - where p.source_id = ot.source_id - and ot.source_id = this_gene.source_id - and ot.ortho_gene_source_id = other_gene.source_id - and this_gene.species = other_gene.species - and ot.is_syntenic = 1 - union - select ga.source_id, p.dataset_name, p.source_id as profile_graph_id - from Profile p, GeneAttributes ga - where p.source_id = ga.source_id - - ; - - - - create index ggp_ix - on GeneGroupProfile - (source_id, dataset_name, profile_graph_id) - - - ; - diff --git a/Model/lib/psql/webtables/UK/StudyIdDatasetId.psql b/Model/lib/psql/webtables/UK/StudyIdDatasetId.psql deleted file mode 100644 index c42d4df261..0000000000 --- a/Model/lib/psql/webtables/UK/StudyIdDatasetId.psql +++ /dev/null @@ -1,24 +0,0 @@ - - - CREATE TABLE StudyIdDatasetId AS - SELECT s.STABLE_ID STUDY_STABLE_ID, dp.DATASET_PRESENTER_ID DATASET_ID, dp.SHORT_DISPLAY_NAME AS DATASET_SHORT_DISPLAY_NAME - FROM EDA.STUDY s - LEFT JOIN sres.EXTERNALDATABASERELEASE e ON s.EXTERNAL_DATABASE_RELEASE_ID =e.EXTERNAL_DATABASE_RELEASE_ID - LEFT JOIN sres.EXTERNALDATABASE e2 ON e.EXTERNAL_DATABASE_ID =e2.EXTERNAL_DATABASE_ID - LEFT JOIN DatasetPresenter dp on e2.name=dp.name - -- This is TEMPORARY (used for alpha MapVEU Application) - UNION - select case - when d.dataset_presenter_id = 'DS_480c976ef9' then 'VBP_MEGA' - when d.dataset_presenter_id = 'DS_e18287e335' then '2023-maine-ricinus' - when d.dataset_presenter_id = 'DS_2b98dd44ab' then '2010-Neafsey-M-S-Bamako' - else 'NA' end as study_stable_id, - d.dataset_presenter_id as dataset_id, d.short_display_name as dataset_short_display_name - from DatasetPresenter d - where d.dataset_presenter_id in ( - 'DS_480c976ef9', - 'DS_e18287e335' - ) - - ; - From ec5bf01e1e3f8ab8ae87b5d19193d485dfb88836 Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Wed, 14 May 2025 17:01:05 -0400 Subject: [PATCH 020/112] wip --- .../webtables/MO/DatasetExampleSourceId.psql | 6 +- .../psql/webtables/MO/OrganismAttributes.psql | 91 ++++++++++--------- Model/lib/psql/webtables/MO/PANIO_ix.psql | 29 ++++++ .../psql/webtables/MO/ProteinAttributes.psql | 34 +++---- Model/lib/psql/webtables/MO/RnaSeqStats.psql | 13 +-- Model/lib/xml/tuningManager/tablePruning.txt | 16 ++-- Model/lib/xml/tuningManager/webtables.org | 36 ++++---- 7 files changed, 131 insertions(+), 94 deletions(-) create mode 100644 Model/lib/psql/webtables/MO/PANIO_ix.psql diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql index 3d10c6319c..4e01371739 100644 --- a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql +++ b/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql @@ -5,18 +5,18 @@ d.name, row_number() over(partition by d.name order by ga.chromosome_order_num, p.profile_as_string desc) as rn - FROM Profile p + FROM :SCHEMA.Profile p INNER JOIN sres.ExternalDatabase d ON p.dataset_name = d.name LEFT JOIN :SCHEMA.GeneAttributes ga ON p.source_id = ga.source_id WHERE p.profile_as_string is not null - and ga.org_abbrev = ':ORG_ABBREV' + and ga.org_abbrev = ':ORG_ABBREV' + and p.org_abbrev = ':ORG_ABBREV' ) SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, p.source_id as example_source_id, p.sequence_id, p.name as dataset FROM profiles p WHERE p.rn = 1 - :DECLARE_PARTITION; diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/MO/OrganismAttributes.psql index 1f68de2f75..bda5e8a2b3 100644 --- a/Model/lib/psql/webtables/MO/OrganismAttributes.psql +++ b/Model/lib/psql/webtables/MO/OrganismAttributes.psql @@ -114,45 +114,47 @@ then p.source_id else '' end)) as geneArrayCount - FROM Profile p - RIGHT OUTER JOIN :SCHEMA.GeneAttributes ga ON ga.source_id = p.source_id and ga.org_abbrev = ':ORG_ABBREV' + FROM :SCHEMA.Profile p + RIGHT OUTER JOIN :SCHEMA.GeneAttributes ga ON ga.source_id = p.source_id + WHERE ga.org_abbrev = ':ORG_ABBREV' + and p.org_abbrev = ':ORG_ABBREV' GROUP BY ga.taxon_id ; - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPopsetCount AS - SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id - FROM ( - (SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id - FROM dots.similarity s, PopsetAttributes i, - core.tableinfo t, dots.nasequence nas - WHERE s.query_id = i.na_sequence_id - AND nas.na_sequence_id = s.subject_id - AND t.table_id = s.subject_table_id - AND t.table_id = s.query_table_id - AND t.name = 'ExternalNASequence' - AND s.pvalue_exp <= -10 - and nas.taxon_id = :TAXON_ID - ) sim LEFT JOIN - (SELECT i.source_id, seq.source_id as sequence_id - FROM dots.similarity s, PopsetAttributes i, GeneAttributes g, - core.tableinfo t, dots.nasequence seq - WHERE s.query_id = i.na_sequence_id - AND s.subject_id = g.na_sequence_id - AND t.table_id = s.subject_table_id - AND t.table_id = s.query_table_id - AND s.min_subject_start <= g.end_max - AND s.max_subject_end >= g.start_min - AND g.na_sequence_id = seq.na_sequence_id - AND t.name = 'ExternalNASequence' - and seq.taxon_id = :TAXON_ID - ) gene - ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id) - GROUP BY sim.taxon_id - - ; + -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPopsetCount AS + -- SELECT count(distinct gene.source_id) as popsetCount, sim.taxon_id + -- FROM ( + -- (SELECT i.source_id, nas.taxon_id, nas.source_id as sequence_source_id + -- FROM dots.similarity s, PopsetAttributes i, + -- core.tableinfo t, dots.nasequence nas + -- WHERE s.query_id = i.na_sequence_id + -- AND nas.na_sequence_id = s.subject_id + -- AND t.table_id = s.subject_table_id + -- AND t.table_id = s.query_table_id + -- AND t.name = 'ExternalNASequence' + -- AND s.pvalue_exp <= -10 + -- and nas.taxon_id = :TAXON_ID + -- ) sim LEFT JOIN + -- (SELECT i.source_id, seq.source_id as sequence_id + -- FROM dots.similarity s, PopsetAttributes i, GeneAttributes g, + -- core.tableinfo t, dots.nasequence seq + -- WHERE s.query_id = i.na_sequence_id + -- AND s.subject_id = g.na_sequence_id + -- AND t.table_id = s.subject_table_id + -- AND t.table_id = s.query_table_id + -- AND s.min_subject_start <= g.end_max + -- AND s.max_subject_end >= g.start_min + -- AND g.na_sequence_id = seq.na_sequence_id + -- AND t.name = 'ExternalNASequence' + -- and seq.taxon_id = :TAXON_ID + -- ) gene + -- ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id) + -- GROUP BY sim.taxon_id + + -- ; @@ -195,8 +197,8 @@ then ga.source_id else NULL end)),0) ecNumberCount - FROM GeneAttributes ga - LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id + FROM :SCHEMA.GeneAttributes ga + LEFT OUTER JOIN apidb.phylogeneticprofile pp on ga.source_id = pp.source_id and ga.org_abbrev = ':ORG_ABBREV' LEFT OUTER JOIN :SCHEMA.gotermsummary gts on ga.source_id = gts.gene_source_id and gts.org_abbrev = ':ORG_ABBREV' LEFT OUTER JOIN :SCHEMA.TFBSGene tfbs on ga.source_id = tfbs.gene_source_id and tfbs.org_abbrev = ':ORG_ABBREV' LEFT OUTER JOIN :SCHEMA.TranscriptAttributes ta on ta.gene_source_id = ga.source_id and ta.org_abbrev = ':ORG_ABBREV' @@ -293,37 +295,38 @@ coalesce(sc.supercont_num, 0) as supercontigCount, coalesce(sc.chrom_num, 0) as chromosomeCount, coalesce(cc.communityCount, 0) as communityCount, - coalesce(psc.popsetCount, 0) as popsetCount, + --coalesce(psc.popsetCount, 0) as popsetCount, coalesce(pc.geneArrayCount, 0) as arrayGeneCount, coalesce(pc.rnaSeqCount, 0) as rnaSeqCount, coalesce(pc.rtPCRCount, 0) as rtPCRCount, coalesce(ta.avg_transcript_length, 0) as avg_transcript_length FROM apidb.Organism o - INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id + INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id and o.taxon_id = :TAXON_ID INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id LEFT JOIN :SCHEMA.:ORG_ABBREVDataSourceCount dsc ON o.taxon_id = dsc.taxon_id LEFT JOIN :SCHEMA.:ORG_ABBREVOrganismCentromere oc ON o.taxon_id = oc.taxon_id LEFT JOIN :SCHEMA.:ORG_ABBREVSequenceCount sc ON o.taxon_id = sc.taxon_id LEFT JOIN :SCHEMA.:ORG_ABBREVCommunityCount cc ON o.taxon_id = cc.taxon_id LEFT JOIN :SCHEMA.:ORG_ABBREVGeneCount gc ON o.taxon_id = gc.taxon_id - LEFT JOIN :SCHEMA.:ORG_ABBREVpopsetCount psc ON o.taxon_id = psc.taxon_id + --LEFT JOIN :SCHEMA.:ORG_ABBREVpopsetCount psc ON o.taxon_id = psc.taxon_id LEFT JOIN :SCHEMA.:ORG_ABBREVprofileCount pc ON o.taxon_id = pc.taxon_id LEFT JOIN ( SELECT taxon_id, round(avg(length),1) as avg_transcript_length - FROM TranscriptAttributes - where org_abbrev = ':ORG_ABBREV' - GROUP by taxon_id + FROM :SCHEMA.TranscriptAttributes + where org_abbrev = ':ORG_ABBREV' + GROUP by taxon_id ) ta ON o.taxon_id = ta.taxon_id WHERE tn.name_class = 'scientific name' ) oa, - TaxonSpecies ts, + :SCHEMA.TaxonSpecies ts, sres.taxon t, sres.taxonname tn2 WHERE oa.component_taxon_id = ts.taxon_id AND ts.species_taxon_id = t.taxon_id AND ts.species_taxon_id = tn2.taxon_id AND tn2.name_class = 'scientific name' - and o.taxon_id = :TAXON_ID + AND o.taxon_id = :TAXON_ID + AND ts.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; @@ -331,7 +334,7 @@ drop table :SCHEMA.:ORG_ABBREVDataSourceCount; drop table :SCHEMA.:ORG_ABBREVOrganismCentromere; drop table :SCHEMA.:ORG_ABBREVProfileCount; -drop table :SCHEMA.:ORG_ABBREVPopsetCount; +--drop table :SCHEMA.:ORG_ABBREVPopsetCount; drop table :SCHEMA.:ORG_ABBREVGeneCount; drop table :SCHEMA.:ORG_ABBREVSequenceCount; drop table :SCHEMA.:ORG_ABBREVCommunityCount; diff --git a/Model/lib/psql/webtables/MO/PANIO_ix.psql b/Model/lib/psql/webtables/MO/PANIO_ix.psql new file mode 100644 index 0000000000..90176e14d7 --- /dev/null +++ b/Model/lib/psql/webtables/MO/PANIO_ix.psql @@ -0,0 +1,29 @@ + create index :SCHEMA.painio2_iix on :SCHEMA.PANIO + (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) + + + ; + + + + create index :SCHEMA.painio2_oix on :SCHEMA.PANIO + (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) + + + ; + + + + create index :SCHEMA.painio2_otypeix on :SCHEMA.PANIO + (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id) + + + ; + + + + create index :SCHEMA.painio2_itypeix on :SCHEMA.PANIO + (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id) + + + ; diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql index ad6c7cd95b..645a1edfae 100644 --- a/Model/lib/psql/webtables/MO/ProteinAttributes.psql +++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql @@ -102,20 +102,20 @@ ; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS - SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived - FROM (SELECT DISTINCT asec.aa_sequence_id, - ec.ec_number || ' (' || ec.description || ')' AS ec_number - FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq - WHERE ec.enzyme_class_id = asec.enzyme_class_id - AND seq.aa_sequence_id = asec.aa_sequence_id - AND seq.taxon_id = :TAXON_ID - AND asec.evidence_code = 'OrthoMCLDerived' - ) t - GROUP BY aa_sequence_id - - ; + --TODO: these rows will not exist in org specific land + -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS + -- SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived + -- FROM (SELECT DISTINCT asec.aa_sequence_id, + -- ec.ec_number || ' (' || ec.description || ')' AS ec_number + -- FROM dots.AaSequenceEnzymeClass asec, sres.EnzymeClass ec, dots.aasequence seq + -- WHERE ec.enzyme_class_id = asec.enzyme_class_id + -- AND seq.aa_sequence_id = asec.aa_sequence_id + -- AND seq.taxon_id = :TAXON_ID + -- AND asec.evidence_code = 'OrthoMCLDerived' + -- ) t + -- GROUP BY aa_sequence_id + + -- ; -- TODO: Filter the subqueries or break into tmp tables for performance @@ -139,7 +139,7 @@ asa.aromaticity_score, SUBSTR(sigp.peptide_sequence, 1, 200) as signalp_peptide, ec_numbers, - ec_numbers_derived, + --ec_numbers_derived, go.annotated_go_component, go.annotated_go_function, go.annotated_go_process, @@ -186,7 +186,7 @@ GROUP BY tms.aa_sequence_id ) transmembrane ON tas.aa_sequence_id = transmembrane.aa_sequence_id LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp ec ON tas.aa_sequence_id = ec.aa_sequence_id - LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id + --LEFT JOIN :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp ecDerived ON tas.aa_sequence_id = ecDerived.aa_sequence_id LEFT JOIN ( SELECT af.aa_sequence_id, string_agg(dbref.primary_identifier, ',' order by dbref.primary_identifier) as uniprot_ids @@ -215,4 +215,4 @@ drop table :SCHEMA.:ORG_ABBREVGoTermList_tmp; drop table :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp; drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp; -drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp; +--drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp; diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats.psql b/Model/lib/psql/webtables/MO/RnaSeqStats.psql index 33964a796c..8945fa2c92 100644 --- a/Model/lib/psql/webtables/MO/RnaSeqStats.psql +++ b/Model/lib/psql/webtables/MO/RnaSeqStats.psql @@ -1,8 +1,6 @@ :CREATE_AND_POPULATE - - - create table RnaSeqStats as - select study_id, study_name, dataset_name, taxon_id, round(avg(num_reads::integer),0) as avg_unique_reads + select study_id, study_name, dataset_name, taxon_id, round(avg(num_reads::integer),0) as avg_unique_reads, + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date from (select sl.node_set_id as study_id , s.name || '[' || s.node_type || ']' as study_name , ed.name as dataset_name @@ -23,10 +21,12 @@ and pan.protocol_app_node_id = c.protocol_app_node_id and c.qualifier_id = ot.ontology_term_id and (ot.source_id = 'EUPATH_0000460' or ot.source_id = 'EuPathUserDefined_00507') - ) subquery1 + and ds.taxon_id = :TAXON_ID + ) subquery1 group by study_id, study_name, dataset_name, taxon_id union - select study_id, study_name, dataset_name, taxon_id, round(2*avg(num_reads::integer),0) as avg_unique_reads + select study_id, study_name, dataset_name, taxon_id, round(2*avg(num_reads::integer),0) as avg_unique_reads, + ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date from (select sl.node_set_id as study_id , s.name || '[' || s.node_type || ']' as study_name , ed.name as dataset_name @@ -47,6 +47,7 @@ and pan.protocol_app_node_id = c.protocol_app_node_id and c.qualifier_id = ot.ontology_term_id and (ot.source_id = 'EUPATH_0000468' or ot.source_id = 'EuPathUserDefined_00515' or ot.source_id = 'EUPATH_0000476' or ot.source_id = 'EuPathUserDefined_00523') + and ds.taxon_id = :TAXON_ID ) subquery2 group by study_id, study_name, dataset_name, taxon_id diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt index aa94aa6da9..d7d5687d76 100644 --- a/Model/lib/xml/tuningManager/tablePruning.txt +++ b/Model/lib/xml/tuningManager/tablePruning.txt @@ -7,9 +7,9 @@ K R ?? K -MO +MC Should rename this "ProteinSequenceGroup" R -MO +K Need to confirm with Rich but this should be handled now with the new interpro table (or tt) MO ?? MO @@ -33,9 +33,9 @@ MO MO K MO (need BOTH org specific version and global -- WHY???) -MO +K MO -MO (Comment column needs to be made into a dedicated attribute query) +K (Comment column needs to be made into a dedicated attribute query) MO MO MO @@ -107,9 +107,9 @@ K MO K K -MO (need to look at this) -MO (need to look at this) -MO (need to look at this) +K (need to look at this) +K (need to look at this) +K (need to look at this) K MO MO @@ -117,7 +117,7 @@ R MO R MC (could be put into the orthomcl graph. Rich and John should look) -MC (similar to OrthologousTranscripts) +K (similar to OrthologousTranscripts) R R MO (PANIO will need to exist) diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index 2ffa4f7f3f..fa0ecc2bde 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -100,23 +100,27 @@ - [X] EstAlignmentGeneSummary.psql - Dataset / Other - - [s] DatasetExampleSourceId_ix.psql - - [s] DatasetExampleSourceId.psql - - [ ] PANIO.psql - - [ ] PANIO_ix.psql - - [ ] Profile_ix.psql - - [ ] Profile.psql - - [ ] ProfileSamples_ix.psql - - [ ] ProfileSamples.psql - - [ ] ProfileType_ix.psql - - [ ] ProfileType.psql - - [ ] RnaSeqStats_ix.psql - - [ ] RnaSeqStats.psql - - [s] OrganismAttributes_ix.psql - - [s] OrganismAttributes.psql + - [X] DatasetExampleSourceId_ix.psql + - [X] DatasetExampleSourceId.psql + - NOTE: this depends on Profiles + - [X] PANExtDBRls.psql + - [X] PANIO.psql + - [X] PANIO_ix.psql + + - [ ] +ProfileType_ix.psql+ + - [ ] +ProfileType.psql+ + - [ ] +Profile_ix.psql+ + - [ ] +Profile.psql+ + - [ ] +ProfileSamples_ix.psql+ + - [ ] +ProfileSamples.psql+ + + - [X] RnaSeqStats_ix.psql + - [X] RnaSeqStats.psql + - [X] OrganismAttributes_ix.psql + - [X] OrganismAttributes.psql - removed ESTs and SNPs - - [s] ChrCopyNumbers_ix.psql - - [s] ChrCopyNumbers.psql + - [X] ChrCopyNumbers_ix.psql + - [X] ChrCopyNumbers.psql - Junctions (Kathryn) - [ ] IntronSupportLevel_ix.psql From cd9c52dcc202297143c472708c3ecfd040863a10 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 16 May 2025 15:55:41 -0400 Subject: [PATCH 021/112] debug --- Model/lib/psql/webtables/MO/GeneLocations_ix.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql index 135dfff694..6839eea3dc 100644 --- a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql @@ -1,4 +1,4 @@ - create :SCHEMA.index gloc_ix + create index :SCHEMA.gloc_ix on :SCHEMA.GeneLocations (source_id, locations) ; From 82793752dae638b7029693f7751413b3d37279e5 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 16 May 2025 16:05:20 -0400 Subject: [PATCH 022/112] debug --- Model/lib/psql/webtables/MO/GeneLocations_ix.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql index 6839eea3dc..006a1a1dff 100644 --- a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneLocations_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.gloc_ix + create index gloc_ix on :SCHEMA.GeneLocations (source_id, locations) ; From 70888238a5d3ea2d810497845f66d66d58bdb245 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 16 May 2025 16:29:37 -0400 Subject: [PATCH 023/112] correct indexes --- .../webtables/MG/CompoundAttributes_ix.psql | 2 +- .../lib/psql/webtables/MG/CompoundId_ix.psql | 2 +- .../webtables/MG/GroupDomainAttribute_ix.psql | 2 +- .../psql/webtables/MG/OntologyLevels_ix.psql | 2 +- .../webtables/MG/PathwayAttributes_ix.psql | 4 +- .../webtables/MG/PathwayCompounds_ix.psql | 2 +- .../webtables/MG/PathwayReactions_ix.psql | 2 +- .../webtables/MO/ChIPchipTranscript_ix.psql | 2 +- .../psql/webtables/MO/ChrCopyNumbers_ix.psql | 4 +- .../psql/webtables/MO/CodingSequence_ix.psql | 2 +- Model/lib/psql/webtables/MO/EqtlSpan_ix.psql | 2 +- .../MO/EstAlignmentGeneSummary_ix.psql | 4 +- .../psql/webtables/MO/EstAttributes_ix.psql | 2 +- .../lib/psql/webtables/MO/EstSequence_ix.psql | 2 +- .../psql/webtables/MO/GeneAttributes_ix.psql | 22 +++++------ .../psql/webtables/MO/GeneCopyNumbers_ix.psql | 2 +- .../lib/psql/webtables/MO/GeneGoTable_ix.psql | 2 +- .../webtables/MO/GeneIntJuncStats_ix.psql | 2 +- .../webtables/MO/GeneIntronJunction_ix.psql | 6 +-- .../webtables/MO/GeneMaxIntronGIJ_ix.psql | 2 +- .../psql/webtables/MO/GeneModelDump_ix.psql | 2 +- .../webtables/MO/GenomicSeqAttributes_ix.psql | 8 ++-- .../webtables/MO/GenomicSequenceId_ix.psql | 6 +-- .../MO/GenomicSequenceSequence_ix.psql | 2 +- .../psql/webtables/MO/GoTermSummary_ix.psql | 4 +- .../psql/webtables/MO/NameMappingGIJ_ix.psql | 2 +- .../webtables/MO/OrganismAttributes_ix.psql | 2 +- Model/lib/psql/webtables/MO/PANIO_ix.psql | 8 ++-- .../webtables/MO/PathwaysGeneTable_ix.psql | 2 +- .../psql/webtables/MO/ProfileSamples_ix.psql | 4 +- Model/lib/psql/webtables/MO/Profile_ix.psql | 6 +-- .../webtables/MO/ProteinAttributes_ix.psql | 4 +- .../psql/webtables/MO/ProteinSequence_ix.psql | 2 +- .../webtables/MO/SequenceEnzymeClass_ix.psql | 4 +- .../webtables/MO/SignalPeptideDomains_ix.psql | 4 +- Model/lib/psql/webtables/MO/TFBSGene_ix.psql | 4 +- Model/lib/psql/webtables/MO/Taxonomy_ix.psql | 2 +- .../webtables/MO/TranscriptAttributes_ix.psql | 38 +++++++++---------- .../MO/TranscriptCenDistance_ix.psql | 2 +- .../webtables/MO/TranscriptPathway_ix.psql | 4 +- .../webtables/MO/TranscriptSequence_ix.psql | 2 +- .../webtables/MO/TransmembraneDomains_ix.psql | 2 +- 42 files changed, 92 insertions(+), 92 deletions(-) diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql b/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql index a16c042e5b..697149da51 100644 --- a/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql +++ b/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql @@ -1,2 +1,2 @@ - CREATE INDEX :SCHEMA.CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id) + CREATE INDEX CompoundAttributes_idx ON CompoundAttributes (source_id) ; diff --git a/Model/lib/psql/webtables/MG/CompoundId_ix.psql b/Model/lib/psql/webtables/MG/CompoundId_ix.psql index 217b020bac..20a155ef65 100644 --- a/Model/lib/psql/webtables/MG/CompoundId_ix.psql +++ b/Model/lib/psql/webtables/MG/CompoundId_ix.psql @@ -1,2 +1,2 @@ - CREATE INDEX :SCHEMA.CompoundId_idx ON :SCHEMA.CompoundId (id, compound) + CREATE INDEX CompoundId_idx ON :SCHEMA.CompoundId (id, compound) ; diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql b/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql index 4112a31ce3..b796bb25e4 100644 --- a/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql +++ b/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql @@ -1,2 +1,2 @@ -CREATE INDEX SCHEMA.GroupDomainAttribute_idx ON SCHEMA.GroupDomainAttribute (group_name) +CREATE INDEX GroupDomainAttribute_idx ON :SCHEMA.GroupDomainAttribute (group_name) ; diff --git a/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql b/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql index 708dc47e5a..5f95d3889a 100644 --- a/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql +++ b/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql @@ -1,2 +1,2 @@ - create index :SCHEMA.olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth) + create index olev_termix on :SCHEMA.OntologyLevels (ontology_term_id, min_depth, max_depth) ; diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql b/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql index 99f50c5a08..a5c4902674 100644 --- a/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql +++ b/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql @@ -1,7 +1,7 @@ - CREATE UNIQUE INDEX :SCHEMA.PathAttr_sourceId_pwaySrc + CREATE UNIQUE PathAttr_sourceId_pwaySrc ON :SCHEMA.PathwayAttributes (source_id, pathway_source) ; - create index :SCHEMA.PathAttr_ix + create index PathAttr_ix on :SCHEMA.PathwayAttributes (pathway_id, source_id, name, pathway_source, total_enzyme_count, total_compound_count) ; diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql b/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql index fbcdfa72e1..3aae4dbb58 100644 --- a/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql +++ b/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.PthCmpd_id_ix + create index PthCmpd_id_ix on :SCHEMA.PathwayCompounds (pathway_id, reaction_id, ext_db_name) ; diff --git a/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql b/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql index e0ed73978b..50dd09d63b 100644 --- a/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql +++ b/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.PathRcts_id_ix + create index PathRcts_id_ix on :SCHEMA.PathwayReactions (reaction_id, reaction_source_id, enzyme, expasy_url, ext_db_name) ; diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql index bd8aaf4411..11d4fa447f 100644 --- a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql +++ b/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.chpgene_geneid_idx ON :SCHEMA.ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id) + create index chpgene_geneid_idx ON :SCHEMA.ChIPchipTranscript (protocol_app_node_id, source_id, gene_source_id) ; diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql index 9d0e711555..4f989590a9 100644 --- a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql +++ b/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql @@ -1,9 +1,9 @@ - CREATE INDEX :SCHEMA.ChrCN_ix + CREATE ChrCN_ix ON :SCHEMA.ChrCopyNumbers (input_pan_id, na_sequence_id) ; - CREATE INDEX :SCHEMA.ChrCN_output + CREATE ChrCN_output ON :SCHEMA.ChrCopyNumbers (output_pan_id) ; diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql index 7d3114121f..e580f236fd 100644 --- a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/CodingSequence_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.CodSeq_ix on :SCHEMA.CodingSequence (source_id, project_id) + create index CodSeq_ix on :SCHEMA.CodingSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql index f4babf6466..9238b045e6 100644 --- a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql +++ b/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.eqtlSpan_ix + create index eqtlSpan_ix on :SCHEMA.eqtlSpan (gene_source_id, project_id, hapblock_id, sequence_id, start_min, end_max, start_max, end_min, organism, lod_score) ; diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql index 6dec9178b5..adac5ecebd 100644 --- a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql +++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql @@ -1,7 +1,7 @@ create index EstSumm_libOverlap_ix - ON EstAlignmentGeneSummary + ON :SCHEMA.EstAlignmentGeneSummary (library_id, percent_identity, is_consistent, est_gene_overlap_length, percent_est_bases_aligned) @@ -11,7 +11,7 @@ create index EstSumm_estSite_ix - ON EstAlignmentGeneSummary + ON :SCHEMA.EstAlignmentGeneSummary (target_sequence_source_id, target_start, target_end, library_id) diff --git a/Model/lib/psql/webtables/MO/EstAttributes_ix.psql b/Model/lib/psql/webtables/MO/EstAttributes_ix.psql index 3708681405..eed35a9960 100644 --- a/Model/lib/psql/webtables/MO/EstAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/EstAttributes_ix.psql @@ -1,6 +1,6 @@ - create unique index EstAttr_source_id ON EstAttributes (source_id) + create unique index EstAttr_source_id ON :SCHEMA.EstAttributes (source_id) ; diff --git a/Model/lib/psql/webtables/MO/EstSequence_ix.psql b/Model/lib/psql/webtables/MO/EstSequence_ix.psql index de699f5486..d124b417bc 100644 --- a/Model/lib/psql/webtables/MO/EstSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/EstSequence_ix.psql @@ -1,3 +1,3 @@ - create index EstSeq_ix on EstSequence (source_id, project_id) + create index EstSeq_ix on :SCHEMA.EstSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql index bd524c4b7a..9f45a0abad 100644 --- a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql @@ -1,47 +1,47 @@ - CREATE UNIQUE INDEX :SCHEMA.GeneAttr_srcPrj + CREATE UNIQUE GeneAttr_srcPrj ON :SCHEMA.GeneAttributes (source_id) ; - CREATE INDEX :SCHEMA.GeneAttr_exon_ix + CREATE GeneAttr_exon_ix ON :SCHEMA.GeneAttributes (exon_count, source_id, project_id) ; - CREATE INDEX :SCHEMA.GeneAttr_loc_ix + CREATE GeneAttr_loc_ix ON :SCHEMA.GeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated) ; - CREATE INDEX :SCHEMA.GeneAttr_feat_ix + CREATE GeneAttr_feat_ix ON :SCHEMA.GeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed) ; - CREATE INDEX :SCHEMA.GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes ( + CREATE GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes ( orthomcl_name, source_id, taxon_id, gene_type, na_feature_id, na_sequence_id, start_min, end_max, organism, species, product, project_id ) ; - CREATE INDEX :SCHEMA.GeneAttr_ortholog_ix + CREATE GeneAttr_ortholog_ix ON :SCHEMA.GeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id) ; - CREATE INDEX :SCHEMA.GeneAttr_orgsrc_ix + CREATE GeneAttr_orgsrc_ix ON :SCHEMA.GeneAttributes (organism, source_id, na_sequence_id, start_min, end_max) ; - CREATE INDEX :SCHEMA.GeneAttr_prjsrc_ix + CREATE GeneAttr_prjsrc_ix ON :SCHEMA.GeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0)) ; - CREATE INDEX :SCHEMA.GeneAttr_txid_ix + CREATE GeneAttr_txid_ix ON :SCHEMA.GeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id) ; - CREATE INDEX :SCHEMA.GeneAttr_ids_ix + CREATE GeneAttr_ids_ix ON :SCHEMA.GeneAttributes (na_feature_id, source_id, project_id) ; - CREATE INDEX :SCHEMA.GeneAttr_loc_intjunc_ix + CREATE GeneAttr_loc_intjunc_ix ON :SCHEMA.GeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX) ; diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql index 4cb38aaa7f..084742ec07 100644 --- a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql @@ -1,4 +1,4 @@ - CREATE INDEX :SCHEMA.GeneCN_ix + CREATE INDEX GeneCN_ix ON :SCHEMA.GeneCopyNumbers (input_pan_id, na_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql index a2ef5c49ec..a065517030 100644 --- a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.ggtab_ix ON :SCHEMA.GeneGoTable + create ggtab_ix ON :SCHEMA.GeneGoTable (source_id, project_id, go_id, transcript_ids, is_not, go_term_name, ontology, source, evidence_code, reference, evidence_code_parameter, sort_key) ; diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql index c295007639..89389667a0 100644 --- a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id) + create GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql index 14c8fdbd4a..3c0cff5069 100644 --- a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql @@ -1,10 +1,10 @@ - create index :SCHEMA.gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) + create gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) ; - create index :SCHEMA.gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id) + create gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id) ; - create index :SCHEMA.gijnew_txnloc_ix + create gijnew_txnloc_ix on :SCHEMA.GeneIntronJunction (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed, total_unique, total_isrpm, annotated_intron) diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql index 927c41fb89..9bd71417ae 100644 --- a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql @@ -1,3 +1,3 @@ - CREATE INDEX :SCHEMA.GnMxIntGIJ_ix on :SCHEMA.GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id) + CREATE INDEX GnMxIntGIJ_ix on :SCHEMA.GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id) ; diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql index 25fc836319..e4388deb01 100644 --- a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql +++ b/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.gmd_ix + create gmd_ix on :SCHEMA.GeneModelDump (source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids) ; diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql index 709e8d2934..039749930e 100644 --- a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql @@ -1,11 +1,11 @@ - create unique index :SCHEMA.pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id) + create unique pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id) ; - create unique index :SCHEMA.SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id) + create unique SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id) ; - create unique index :SCHEMA.SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id) + create unique SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id) ; - create unique index :SCHEMA.SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id) + create unique SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id) ; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql index 093edf9acb..0cfa5601ed 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql @@ -1,9 +1,9 @@ - CREATE INDEX :SCHEMA.GenSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id) + CREATE INDEX genSeqId_sequence_idx ON :SCHEMA.GenomicSequenceId (sequence, id) ; - CREATE INDEX :SCHEMA.GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence) + CREATE INDEX GenSeqId_id_idx ON :SCHEMA.GenomicSequenceId (id, sequence) ; - CREATE INDEX :SCHEMA.GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence) + CREATE INDEX GenSeqId_lowid_idx ON :SCHEMA.GenomicSequenceId (lower(id), sequence) ; diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql index ea41f21285..497278e368 100644 --- a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id) + create GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql index 0b0ef12e90..8c5134b475 100644 --- a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql +++ b/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql @@ -1,7 +1,7 @@ - create index :SCHEMA.GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source) + create GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source) ; - create index :SCHEMA.GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary + create GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary (ontology, gene_source_id, is_not, is_go_slim, go_id, go_term_name, evidence_code, evidence_category) ; diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql index 443efe39c8..4245fce828 100644 --- a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql +++ b/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id) + create namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id) ; diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql index b018637f7d..d1f420c0df 100644 --- a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql @@ -1,3 +1,3 @@ -create unique index :SCHEMA.Organism_sourceId_idx ON :SCHEMA.OrganismAttributes (source_id) +create unique Organism_sourceId_idx ON :SCHEMA.OrganismAttributes (source_id) ; diff --git a/Model/lib/psql/webtables/MO/PANIO_ix.psql b/Model/lib/psql/webtables/MO/PANIO_ix.psql index 90176e14d7..9bbb34ec21 100644 --- a/Model/lib/psql/webtables/MO/PANIO_ix.psql +++ b/Model/lib/psql/webtables/MO/PANIO_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.painio2_iix on :SCHEMA.PANIO + create painio2_iix on :SCHEMA.PANIO (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) @@ -6,7 +6,7 @@ - create index :SCHEMA.painio2_oix on :SCHEMA.PANIO + create painio2_oix on :SCHEMA.PANIO (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) @@ -14,7 +14,7 @@ - create index :SCHEMA.painio2_otypeix on :SCHEMA.PANIO + create painio2_otypeix on :SCHEMA.PANIO (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id) @@ -22,7 +22,7 @@ - create index :SCHEMA.painio2_itypeix on :SCHEMA.PANIO + create painio2_itypeix on :SCHEMA.PANIO (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id) diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql index 2cc01784ca..40750a7f84 100644 --- a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql +++ b/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.pgt_ix on :SCHEMA.PathwaysGeneTable + create pgt_ix on :SCHEMA.PathwaysGeneTable (gene_source_id, project_id, pathway_source_id, pathway_name, reactions, enzyme, expasy_url, pathway_source, exact_match) diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql index fceecb4adf..d59cf5e21b 100644 --- a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql +++ b/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql @@ -1,11 +1,11 @@ - create :SCHEMA.index psamp_ix + create index psamp_ix on :SCHEMA.ProfileSamples (dataset_name, profile_type, study_id, node_order_num, protocol_app_node_id, profile_set_suffix, study_name, node_type, protocol_app_node_name) ; - create index :SCHEMA.psampstdy_ix + create index psampstdy_ix on :SCHEMA.ProfileSamples (study_name, node_type, profile_type, node_order_num, protocol_app_node_id, profile_set_suffix, study_id, diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/MO/Profile_ix.psql index e4aebd5c31..46b746ade0 100644 --- a/Model/lib/psql/webtables/MO/Profile_ix.psql +++ b/Model/lib/psql/webtables/MO/Profile_ix.psql @@ -1,12 +1,12 @@ - create index :SCHEMA.exprof_idx + create exprof_idx on :SCHEMA.Profile (source_id, profile_type, profile_set_name) ; - create index :SCHEMA.profset_idx + create profset_idx on :SCHEMA.Profile (profile_set_name, profile_type) ; - create index :SCHEMA.srcdset_idx + create srcdset_idx on :SCHEMA.Profile (source_id, dataset_subtype, dataset_type) ; diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql index 2faff407d3..3abf032197 100644 --- a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql @@ -1,6 +1,6 @@ - CREATE INDEX :SCHEMA.PA_sourceId ON :SCHEMA.ProteinAttributes (source_id) + CREATE INDEX PA_sourceId ON :SCHEMA.ProteinAttributes (source_id) ; - CREATE INDEX :SCHEMA.PA_aaSequenceId ON :SCHEMA.ProteinAttributes (aa_sequence_id) + CREATE INDEX PA_aaSequenceId ON :SCHEMA.ProteinAttributes (aa_sequence_id) ; diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql index a11708b620..8dad2f7481 100644 --- a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.ProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id) + create index ProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql index 9a55de605a..255767b473 100644 --- a/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql +++ b/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql @@ -1,7 +1,7 @@ - CREATE INDEX :SCHEMA.SequenceEnzymeClass_idx1 ON :SCHEMA.SequenceEnzymeClass (group_name, ec_number, description) + CREATE INDEX SequenceEnzymeClass_idx1 ON :SCHEMA.SequenceEnzymeClass (group_name, ec_number, description) ; - CREATE INDEX :SCHEMA.SequenceEnzymeClass_idx2 ON :SCHEMA.SequenceEnzymeClass (full_id, ec_number, description) + CREATE INDEX SequenceEnzymeClass_idx2 ON :SCHEMA.SequenceEnzymeClass (full_id, ec_number, description) ; diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql index 5cd5d15432..404e82cc02 100644 --- a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql +++ b/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql @@ -1,8 +1,8 @@ - CREATE INDEX :SCHEMA.SignalP1_ix + CREATE INDEX SignalP1_ix ON :SCHEMA.SignalPeptideDomains (aa_sequence_id) ; - CREATE INDEX :SCHEMA.SignalP2_ix + CREATE INDEX SignalP2_ix ON :SCHEMA.SignalPeptideDomains (gene_source_id, transcript_source_id, end_max) ; diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql index 5acc3e9f9b..7eee68e1d4 100644 --- a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql +++ b/Model/lib/psql/webtables/MO/TFBSGene_ix.psql @@ -1,6 +1,6 @@ - create index :SCHEMA.tfbs_geneid_idx ON :SCHEMA.TFBSGene (gene_source_id, tfbs_na_feature_id) + create index tfbs_geneid_idx ON :SCHEMA.TFBSGene (gene_source_id, tfbs_na_feature_id) ; - create index :SCHEMA.geneid_tfbs_idx ON :SCHEMA.TFBSGene (tfbs_na_feature_id,gene_source_id) + create index geneid_tfbs_idx ON :SCHEMA.TFBSGene (tfbs_na_feature_id,gene_source_id) ; diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql index 7b29c4aa03..e7005f8319 100644 --- a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql +++ b/Model/lib/psql/webtables/MO/Taxonomy_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.tax_ix + create index tax_ix on :SCHEMA.Taxonomy (organism, ordernum, taxon_id, parent_id, ncbi_tax_id, name, rank) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql index 08b54452aa..dd1670b4a3 100644 --- a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql @@ -1,55 +1,55 @@ - CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_sourceId - ON TranscriptAttributes (source_id) + CREATE UNIQUE INDEX TranscriptAttr_sourceId + ON :SCHEMA.TranscriptAttributes (source_id) ; - CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_srcPrj - ON TranscriptAttributes (source_id, gene_source_id, project_id) + CREATE UNIQUE INDEX TranscriptAttr_srcPrj + ON :SCHEMA.TranscriptAttributes (source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_genesrc - ON TranscriptAttributes (gene_source_id, source_id, project_id) + CREATE UNIQUE INDEX TranscriptAttr_genesrc + ON :SCHEMA.TranscriptAttributes (gene_source_id, source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_exon_ix - ON TranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id) + CREATE UNIQUE INDEX TranscriptAttr_exon_ix + ON :SCHEMA.TranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_loc_ix + CREATE UNIQUE INDEX TranscriptAttr_loc_ix ON :SCHEMA.TranscriptAttributes (na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id, is_deprecated, source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_feat_ix + CREATE UNIQUE INDEX TranscriptAttr_feat_ix ON :SCHEMA.TranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_geneid_ix + CREATE UNIQUE INDEX TranscriptAttr_geneid_ix ON :SCHEMA.TranscriptAttributes (gene_id, source_id, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.TransAttr_orthoname_ix + CREATE UNIQUE INDEX TransAttr_orthoname_ix ON :SCHEMA.TranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id) ; - CREATE UNIQUE INDEX :SCHEMA.TransAttr_molwt_ix + CREATE UNIQUE INDEX TransAttr_molwt_ix ON :SCHEMA.TranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id) ; - CREATE INDEX :SCHEMA.TransAttr_ortholog_ix + CREATE INDEX TransAttr_ortholog_ix ON :SCHEMA.TranscriptAttributes (source_id, na_sequence_id, gene_start_min, gene_end_max, orthomcl_name, gene_source_id, project_id) ; - CREATE INDEX :SCHEMA.TransAttr_orgsrc_ix + CREATE INDEX TransAttr_orgsrc_ix ON :SCHEMA.TranscriptAttributes (organism, source_id, sequence_id, gene_start_min, gene_end_max) ; - CREATE INDEX :SCHEMA.TransAttr_lwrsrc_ix + CREATE INDEX TransAttr_lwrsrc_ix ON :SCHEMA.TranscriptAttributes (lower(source_id), gene_source_id, project_id, source_id) ; - CREATE INDEX :SCHEMA.TransAttr_species_ix + CREATE INDEX TransAttr_species_ix ON :SCHEMA.TranscriptAttributes (species, source_id, gene_id, gene_source_id, project_id) ; @@ -60,11 +60,11 @@ five_prime_utr_length, three_prime_utr_length) ; - CREATE UNIQUE INDEX :SCHEMA.TranscriptAttr_genenaf + CREATE UNIQUE INDEX TranscriptAttr_genenaf ON :SCHEMA.TranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id) ; - CREATE INDEX :SCHEMA.TransAttr_locsIds_ix + CREATE INDEX TransAttr_locsIds_ix ON :SCHEMA.TranscriptAttributes (na_sequence_id, start_min, end_max, is_reversed, gene_source_id, source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql index ad1c71a2fc..25e656ced9 100644 --- a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.GCent_loc_ix + create index GCent_loc_ix on :SCHEMA.TranscriptCenDistance (genomic_sequence, centromere_distance) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql index 2b05b42f44..92f77af43c 100644 --- a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql @@ -1,11 +1,11 @@ - create index :SCHEMA.TranscriptPath_ix + create index TranscriptPath_ix on :SCHEMA.TranscriptPathway (gene_source_id, source_id, pathway_source_id, pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway, ec_number_pathway, pathway_source) ; - create index :SCHEMA.TranscriptPathSource_ix + create index TranscriptPathSource_ix on :SCHEMA.TranscriptPathway (pathway_source, gene_source_id, source_id) ; diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql index 1e1b36617d..98bb3137f9 100644 --- a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql +++ b/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql @@ -1,3 +1,3 @@ - create index :SCHEMA.XScriptSeq_ix on :SCHEMA.TranscriptSequence (source_id, project_id) + create index XScriptSeq_ix on :SCHEMA.TranscriptSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql index 613aff3e51..964f234217 100644 --- a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql +++ b/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql @@ -1,4 +1,4 @@ - create index :SCHEMA.TransDom1_ix + create index TransDom1_ix on :SCHEMA.TransmembraneDomains (tmf_aa_sequence_id, tmf_aa_feature_id, tmf_start_min, tmf_end_max, tmf_topology) ; From f7441155e93c35c64c1f47ebd957e27e63987911 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Mon, 19 May 2025 13:57:49 -0400 Subject: [PATCH 024/112] drop table and clean Ks --- .../{MO => K}/DatasetExampleSourceId.psql | 0 .../{MO => K}/DatasetExampleSourceId_ix.psql | 0 .../{MO => K}/OrganismAbbreviationBlast.psql | 0 .../OrganismAbbreviationBlast_ix.psql | 0 .../{MO => K}/OrganismAttributes.psql | 0 .../{MO => K}/OrganismAttributes_ix.psql | 0 .../lib/psql/webtables/{MO => K}/Profile.psql | 0 .../webtables/{MO => K}/ProfileSamples.psql | 0 .../{MO => K}/ProfileSamples_ix.psql | 0 .../psql/webtables/{MO => K}/ProfileType.psql | 0 .../webtables/{MO => K}/ProfileType_ix.psql | 0 .../psql/webtables/{MO => K}/Profile_ix.psql | 0 .../lib/psql/webtables/MG/OntologyLevels.psql | 6 ++++- .../psql/webtables/MG/PathwayAttributes.psql | 4 ++- Model/lib/psql/webtables/MG/PathwayNodes.psql | 26 +++++++++++++++---- .../webtables/MO/EstAlignmentGeneSummary.psql | 4 +-- Model/lib/psql/webtables/MO/GeneId.psql | 2 ++ .../lib/psql/webtables/MO/NameMappingGIJ.psql | 8 +++--- .../psql/webtables/MO/ProteinAttributes.psql | 7 ++--- .../webtables/MO/SequencePieceClosure.psql | 3 ++- .../webtables/MO/TranscriptAttributes.psql | 4 +-- 21 files changed, 45 insertions(+), 19 deletions(-) rename Model/lib/psql/webtables/{MO => K}/DatasetExampleSourceId.psql (100%) rename Model/lib/psql/webtables/{MO => K}/DatasetExampleSourceId_ix.psql (100%) rename Model/lib/psql/webtables/{MO => K}/OrganismAbbreviationBlast.psql (100%) rename Model/lib/psql/webtables/{MO => K}/OrganismAbbreviationBlast_ix.psql (100%) rename Model/lib/psql/webtables/{MO => K}/OrganismAttributes.psql (100%) rename Model/lib/psql/webtables/{MO => K}/OrganismAttributes_ix.psql (100%) rename Model/lib/psql/webtables/{MO => K}/Profile.psql (100%) rename Model/lib/psql/webtables/{MO => K}/ProfileSamples.psql (100%) rename Model/lib/psql/webtables/{MO => K}/ProfileSamples_ix.psql (100%) rename Model/lib/psql/webtables/{MO => K}/ProfileType.psql (100%) rename Model/lib/psql/webtables/{MO => K}/ProfileType_ix.psql (100%) rename Model/lib/psql/webtables/{MO => K}/Profile_ix.psql (100%) diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql b/Model/lib/psql/webtables/K/DatasetExampleSourceId.psql similarity index 100% rename from Model/lib/psql/webtables/MO/DatasetExampleSourceId.psql rename to Model/lib/psql/webtables/K/DatasetExampleSourceId.psql diff --git a/Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql b/Model/lib/psql/webtables/K/DatasetExampleSourceId_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/DatasetExampleSourceId_ix.psql rename to Model/lib/psql/webtables/K/DatasetExampleSourceId_ix.psql diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql b/Model/lib/psql/webtables/K/OrganismAbbreviationBlast.psql similarity index 100% rename from Model/lib/psql/webtables/MO/OrganismAbbreviationBlast.psql rename to Model/lib/psql/webtables/K/OrganismAbbreviationBlast.psql diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql b/Model/lib/psql/webtables/K/OrganismAbbreviationBlast_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/OrganismAbbreviationBlast_ix.psql rename to Model/lib/psql/webtables/K/OrganismAbbreviationBlast_ix.psql diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes.psql b/Model/lib/psql/webtables/K/OrganismAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/MO/OrganismAttributes.psql rename to Model/lib/psql/webtables/K/OrganismAttributes.psql diff --git a/Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql b/Model/lib/psql/webtables/K/OrganismAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/OrganismAttributes_ix.psql rename to Model/lib/psql/webtables/K/OrganismAttributes_ix.psql diff --git a/Model/lib/psql/webtables/MO/Profile.psql b/Model/lib/psql/webtables/K/Profile.psql similarity index 100% rename from Model/lib/psql/webtables/MO/Profile.psql rename to Model/lib/psql/webtables/K/Profile.psql diff --git a/Model/lib/psql/webtables/MO/ProfileSamples.psql b/Model/lib/psql/webtables/K/ProfileSamples.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ProfileSamples.psql rename to Model/lib/psql/webtables/K/ProfileSamples.psql diff --git a/Model/lib/psql/webtables/MO/ProfileSamples_ix.psql b/Model/lib/psql/webtables/K/ProfileSamples_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ProfileSamples_ix.psql rename to Model/lib/psql/webtables/K/ProfileSamples_ix.psql diff --git a/Model/lib/psql/webtables/MO/ProfileType.psql b/Model/lib/psql/webtables/K/ProfileType.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ProfileType.psql rename to Model/lib/psql/webtables/K/ProfileType.psql diff --git a/Model/lib/psql/webtables/MO/ProfileType_ix.psql b/Model/lib/psql/webtables/K/ProfileType_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ProfileType_ix.psql rename to Model/lib/psql/webtables/K/ProfileType_ix.psql diff --git a/Model/lib/psql/webtables/MO/Profile_ix.psql b/Model/lib/psql/webtables/K/Profile_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/Profile_ix.psql rename to Model/lib/psql/webtables/K/Profile_ix.psql diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webtables/MG/OntologyLevels.psql index 47df6e98e2..2a80ec0fb4 100644 --- a/Model/lib/psql/webtables/MG/OntologyLevels.psql +++ b/Model/lib/psql/webtables/MG/OntologyLevels.psql @@ -1,10 +1,14 @@ - CREATE UNLOGGED TABLE :SCHEMA.Is_a_links AS + DROP TABLE IF EXISTS :SCHEMA.Is_a_links; + + CREATE UNLOGGED TABLE :SCHEMA.Is_a_links AS SELECT subject_term_id, object_term_id FROM sres.OntologyRelationship rel, sres.OntologyTerm pred WHERE rel.predicate_term_id = pred.ontology_term_id AND pred.name = 'is_a' ; + DROP TABLE IF EXISTS :SCHEMA.Roots; + CREATE UNLOGGED TABLE :SCHEMA.Roots AS SELECT object_term_id FROM :SCHEMA.is_a_links EXCEPT diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webtables/MG/PathwayAttributes.psql index e976027524..5c8567fffe 100644 --- a/Model/lib/psql/webtables/MG/PathwayAttributes.psql +++ b/Model/lib/psql/webtables/MG/PathwayAttributes.psql @@ -1,4 +1,6 @@ - CREATE TABLE :SCHEMA.PathwayAttributes as + drop table if exists :SCHEMA.PathwayAttributes; + + CREATE TABLE :SCHEMA.PathwayAttributes as SELECT p.source_id , p.pathway_id diff --git a/Model/lib/psql/webtables/MG/PathwayNodes.psql b/Model/lib/psql/webtables/MG/PathwayNodes.psql index 7b8740ca5b..87a255e6ae 100644 --- a/Model/lib/psql/webtables/MG/PathwayNodes.psql +++ b/Model/lib/psql/webtables/MG/PathwayNodes.psql @@ -1,4 +1,6 @@ - CREATE UNLOGGED TABLE :SCHEMA.NodesWithTypes AS + DROP TABLE IF EXISTS :SCHEMA.NodesWithTypes; + + CREATE UNLOGGED TABLE :SCHEMA.NodesWithTypes AS SELECT pn.pathway_id , CASE WHEN pa.name IS NOT NULL THEN pa.name ELSE pn.display_label END AS display_label , pa.url @@ -83,7 +85,9 @@ WHERE ot.name = 'molecular entity' ; - CREATE UNLOGGED TABLE :SCHEMA.ReactionsWithReversibility AS + DROP TABLE IF EXISTS :SCHEMA.ReactionsWithReversibility; + + CREATE UNLOGGED TABLE :SCHEMA.ReactionsWithReversibility AS SELECT DISTINCT spr.pathway_relationship_id , tpr.is_reversible , tpr.reaction_source_id @@ -94,7 +98,9 @@ AND tpr.reaction_id = prr.pathway_reaction_id ; - CREATE UNLOGGED TABLE :SCHEMA.EnzymeEdges AS + DROP TABLE IF EXISTS :SCHEMA.EnzymeEdges; + + CREATE UNLOGGED TABLE :SCHEMA.EnzymeEdges AS SELECT DISTINCT nwt.pathway_id AS pathway_id , nwt.pathway_node_id AS e_id , nwt.type @@ -117,7 +123,9 @@ AND rri.reaction_source_id = rro.reaction_source_id ; - CREATE UNLOGGED TABLE :SCHEMA.ParentNodes AS + DROP TABLE IF EXISTS :SCHEMA.ParentNodes; + + CREATE UNLOGGED TABLE :SCHEMA.ParentNodes AS WITH AllEnzymeEdges AS ( SELECT string_agg(io, ',' ORDER BY io) AS all_edges , e_id @@ -141,6 +149,8 @@ WHERE aee.all_edges = pn.all_edges ; + DROP TABLE IF EXISTS :SCHEMA.NodesWithParents; + CREATE UNLOGGED TABLE :SCHEMA.NodesWithParents AS SELECT DISTINCT ee.e_id AS pathway_node_id , pn.parent @@ -152,6 +162,8 @@ AND ee.e_id = pn.e_id ; + DROP TABLE IF EXISTS :SCHEMA.EnzymeReactions; + CREATE UNLOGGED TABLE :SCHEMA.EnzymeReactions AS SELECT DISTINCT pn.PATHWAY_NODE_ID node_id , pr.SOURCE_ID AS reaction_source_id @@ -167,6 +179,8 @@ AND pn.PATHWAY_NODE_TYPE_ID = ot.ONTOLOGY_TERM_ID ; + DROP TABLE IF EXISTS :SCHEMA.ParentsForEdges; + CREATE UNLOGGED TABLE :SCHEMA.ParentsForEdges AS SELECT ee.e_id , ee.m1_id @@ -179,7 +193,9 @@ WHERE ee.e_id = np.pathway_node_id ; - CREATE TABLE :SCHEMA.PathwayEdges AS + DROP TABLE IF EXISTS :SCHEMA.PathwayEdges; + + CREATE UNLOGGED TABLE :SCHEMA.PathwayEdges AS SELECT pa.source_id , pa.pathway_source , rel.* diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql index 4dbff1eca8..dc2936487b 100644 --- a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql +++ b/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql @@ -1,4 +1,4 @@ - + drop table if exists :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp; CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp AS SELECT ba.blat_alignment_id, ba.query_na_sequence_id, e.accession, @@ -24,7 +24,7 @@ ; - + drop table if exists :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp; CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVEstAlignmentNoGeneTmp AS SELECT * from :SCHEMA.:ORG_ABBREVEstAlignmentGeneTmp WHERE 1=0 UNION /* define datatype for null column */ diff --git a/Model/lib/psql/webtables/MO/GeneId.psql b/Model/lib/psql/webtables/MO/GeneId.psql index 7feaf7ebc2..75b1bfdab5 100644 --- a/Model/lib/psql/webtables/MO/GeneId.psql +++ b/Model/lib/psql/webtables/MO/GeneId.psql @@ -1,3 +1,5 @@ +drop table if exists :SCHEMA.:ORG_ABBREVGeneFeatureTmp; + create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as (select gf.na_feature_id , gf.na_sequence_id diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ.psql b/Model/lib/psql/webtables/MO/NameMappingGIJ.psql index edc7d78f8e..678512b373 100644 --- a/Model/lib/psql/webtables/MO/NameMappingGIJ.psql +++ b/Model/lib/psql/webtables/MO/NameMappingGIJ.psql @@ -1,6 +1,6 @@ - + DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVJunExpGIJtmp; - CREATE UNLOGGED TABLE JunExpGIJtmp AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunExpGIJtmp AS WITH ij AS ( SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name, regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name @@ -47,9 +47,9 @@ ; - + drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; - CREATE UNLOGGED TABLE MappingStatsGIJtmp ( + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( junctions_pan_id, read_length, mapped_reads, diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webtables/MO/ProteinAttributes.psql index 645a1edfae..a5cfad9f82 100644 --- a/Model/lib/psql/webtables/MO/ProteinAttributes.psql +++ b/Model/lib/psql/webtables/MO/ProteinAttributes.psql @@ -1,4 +1,4 @@ - + DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVGoTermList_tmp; CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGoTermList_tmp AS SELECT aa_sequence_id, ontology, source, @@ -15,7 +15,7 @@ ; - + DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp; CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp AS SELECT DISTINCT gts.aa_sequence_id, @@ -85,7 +85,7 @@ ; - + DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp; CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp AS SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers @@ -101,6 +101,7 @@ ; + DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp; --TODO: these rows will not exist in org specific land -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS diff --git a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql index a7fc7c39b4..6da04c90fb 100644 --- a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql +++ b/Model/lib/psql/webtables/MO/SequencePieceClosure.psql @@ -1,4 +1,5 @@ - CREATE TABLE :ORG_ABBREVSequencePieceClosure AS + + CREATE TABLE :ORG_ABBREVSequencePieceClosure AS SELECT sp.sequence_piece_id, sp.virtual_na_sequence_id, sp.piece_na_sequence_id, diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql index e82dea4d5b..cc70739339 100644 --- a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql +++ b/Model/lib/psql/webtables/MO/TranscriptAttributes.psql @@ -1,6 +1,6 @@ -\ + DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp; - CREATE table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp AS + CREATE UNLOGGED table :SCHEMA.:ORG_ABBREVTranscriptUniprot_tmp AS select na_feature_id, substr(string_agg(uniprot_id, ',' order by uniprot_id), 1, 240) as uniprot_id, substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal From 02d596663486e2122959167e24ac27c9047ee145 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Mon, 19 May 2025 18:32:08 -0400 Subject: [PATCH 025/112] rename webready folders --- Model/lib/psql/{webtables => webready}/UK/AssociatedDataset.psql | 0 Model/lib/psql/{webtables => webready}/UK/DomainAssignment.psql | 0 Model/lib/psql/{webtables => webready}/UK/EdaGeneGraph.psql | 0 Model/lib/psql/{webtables => webready}/UK/PANResults.psql | 0 Model/lib/psql/{webtables => webready}/UK/ProjectTaxon.psql | 0 Model/lib/psql/{webtables => webready}/UK/TypeAheadCounts.psql | 0 .../{webtables/UK => webready/comparative}/AlphaFoldGenes.psql | 0 .../UK => webready/comparative}/GroupPhylogeneticProfile.psql | 0 .../UK => webready/comparative}/OrthologousTranscripts.psql | 0 .../{webtables/UK => webready/comparative}/PhyleticPattern.psql | 0 .../{webtables/MG => webready/global}/CompoundAttributes.psql | 0 .../{webtables/MG => webready/global}/CompoundAttributes_ix.psql | 0 Model/lib/psql/{webtables/MG => webready/global}/CompoundId.psql | 0 .../lib/psql/{webtables/MG => webready/global}/CompoundId_ix.psql | 0 .../{webtables/MG => webready/global}/CompoundProperties.psql | 0 .../{webtables/MG => webready/global}/CompoundTypeAheads.psql | 0 .../{webtables/MG => webready/global}/GroupDomainAttribute.psql | 0 .../MG => webready/global}/GroupDomainAttribute_ix.psql | 0 .../psql/{webtables/MG => webready/global}/OntologyLevels.psql | 0 .../psql/{webtables/MG => webready/global}/OntologyLevels_ix.psql | 0 .../psql/{webtables/MG => webready/global}/PathwayAttributes.psql | 0 .../{webtables/MG => webready/global}/PathwayAttributes_ix.psql | 0 .../psql/{webtables/MG => webready/global}/PathwayCompounds.psql | 0 .../{webtables/MG => webready/global}/PathwayCompounds_ix.psql | 0 .../lib/psql/{webtables/MG => webready/global}/PathwayNodes.psql | 0 .../psql/{webtables/MG => webready/global}/PathwayReactions.psql | 0 .../{webtables/MG => webready/global}/PathwayReactions_ix.psql | 0 .../{webtables/UK => webready/global}/SequenceAttributes.psql | 0 .../{webtables/K => webready/keep}/DatasetExampleSourceId.psql | 0 .../{webtables/K => webready/keep}/DatasetExampleSourceId_ix.psql | 0 .../{webtables/K => webready/keep}/OrganismAbbreviationBlast.psql | 0 .../K => webready/keep}/OrganismAbbreviationBlast_ix.psql | 0 .../psql/{webtables/K => webready/keep}/OrganismAttributes.psql | 0 .../{webtables/K => webready/keep}/OrganismAttributes_ix.psql | 0 Model/lib/psql/{webtables/K => webready/keep}/Profile.psql | 0 Model/lib/psql/{webtables/K => webready/keep}/ProfileSamples.psql | 0 .../psql/{webtables/K => webready/keep}/ProfileSamples_ix.psql | 0 Model/lib/psql/{webtables/K => webready/keep}/ProfileType.psql | 0 Model/lib/psql/{webtables/K => webready/keep}/ProfileType_ix.psql | 0 Model/lib/psql/{webtables/K => webready/keep}/Profile_ix.psql | 0 .../MO => webready/orgSpecific}/ChIPchipTranscript.psql | 0 .../MO => webready/orgSpecific}/ChIPchipTranscript_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/ChrCopyNumbers.psql | 0 .../{webtables/MO => webready/orgSpecific}/ChrCopyNumbers_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/CodingSequence.psql | 0 .../{webtables/MO => webready/orgSpecific}/CodingSequence_ix.psql | 0 .../lib/psql/{webtables/MO => webready/orgSpecific}/EqtlSpan.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/EqtlSpan_ix.psql | 0 .../MO => webready/orgSpecific}/EstAlignmentGeneSummary.psql | 0 .../MO => webready/orgSpecific}/EstAlignmentGeneSummary_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/EstAttributes.psql | 0 .../{webtables/MO => webready/orgSpecific}/EstAttributes_ix.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/EstSequence.psql | 0 .../{webtables/MO => webready/orgSpecific}/EstSequence_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneAttributes.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneAttributes_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneCopyNumbers.psql | 0 .../MO => webready/orgSpecific}/GeneCopyNumbers_ix.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/GeneGoTable.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneGoTable_ix.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/GeneGoTerms.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneGoTerms_ix.psql | 0 Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneId.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/GeneId_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneIntJuncStats.psql | 0 .../MO => webready/orgSpecific}/GeneIntJuncStats_ix.psql | 0 .../MO => webready/orgSpecific}/GeneIntronJunction.psql | 0 .../MO => webready/orgSpecific}/GeneIntronJunction_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneLocations.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneLocations_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneMaxIntronGIJ.psql | 0 .../MO => webready/orgSpecific}/GeneMaxIntronGIJ_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneModelDump.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneModelDump_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GeneSummaryFilter.psql | 0 .../MO => webready/orgSpecific}/GeneSummaryFilter_ix.psql | 0 .../MO => webready/orgSpecific}/GenomicSeqAttributes.psql | 0 .../MO => webready/orgSpecific}/GenomicSeqAttributes_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GenomicSequenceId.psql | 0 .../MO => webready/orgSpecific}/GenomicSequenceId_ix.psql | 0 .../MO => webready/orgSpecific}/GenomicSequenceSequence.psql | 0 .../MO => webready/orgSpecific}/GenomicSequenceSequence_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/GoTermSummary.psql | 0 .../{webtables/MO => webready/orgSpecific}/GoTermSummary_ix.psql | 0 .../MO => webready/orgSpecific}/IntronSupportLevel.psql | 0 .../MO => webready/orgSpecific}/IntronSupportLevel_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/IntronUtrCoords.psql | 0 .../MO => webready/orgSpecific}/IntronUtrCoords_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/NameMappingGIJ.psql | 0 .../{webtables/MO => webready/orgSpecific}/NameMappingGIJ_ix.psql | 0 .../MO => webready/orgSpecific}/OrganismAbbreviation.psql | 0 .../MO => webready/orgSpecific}/OrganismAbbreviation_ix.psql | 0 .../MO => webready/orgSpecific}/OrganismSelectTaxonRank.psql | 0 .../MO => webready/orgSpecific}/OrganismSelectTaxonRank_ix.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/PANExtDbRls.psql | 0 Model/lib/psql/{webtables/MO => webready/orgSpecific}/PANIO.psql | 0 .../lib/psql/{webtables/MO => webready/orgSpecific}/PANIO_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/PathwayNodeGene.psql | 0 .../MO => webready/orgSpecific}/PathwayNodeGene_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/PathwaysGeneTable.psql | 0 .../MO => webready/orgSpecific}/PathwaysGeneTable_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/PdbSimilarity.psql | 0 .../{webtables/MO => webready/orgSpecific}/PdbSimilarity_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/ProteinAttributes.psql | 0 .../MO => webready/orgSpecific}/ProteinAttributes_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/ProteinSequence.psql | 0 .../MO => webready/orgSpecific}/ProteinSequence_ix.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/RnaSeqStats.psql | 0 .../{webtables/MO => webready/orgSpecific}/RnaSeqStats_ix.psql | 0 .../MO => webready/orgSpecific}/SequenceEnzymeClass.psql | 0 .../MO => webready/orgSpecific}/SequenceEnzymeClass_ix.psql | 0 .../MO => webready/orgSpecific}/SequencePieceClosure.psql | 0 .../MO => webready/orgSpecific}/SignalPeptideDomains.psql | 0 .../MO => webready/orgSpecific}/SignalPeptideDomains_ix.psql | 0 .../lib/psql/{webtables/MO => webready/orgSpecific}/TFBSGene.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/TFBSGene_ix.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/TaxonSpecies.psql | 0 .../{webtables/MO => webready/orgSpecific}/TaxonSpecies_ix.psql | 0 .../lib/psql/{webtables/MO => webready/orgSpecific}/Taxonomy.psql | 0 .../psql/{webtables/MO => webready/orgSpecific}/Taxonomy_ix.psql | 0 .../MO => webready/orgSpecific}/TranscriptAttributes.psql | 0 .../MO => webready/orgSpecific}/TranscriptAttributes_ix.psql | 0 .../MO => webready/orgSpecific}/TranscriptCenDistance.psql | 0 .../MO => webready/orgSpecific}/TranscriptCenDistance_ix.psql | 0 .../{webtables/MO => webready/orgSpecific}/TranscriptPathway.psql | 0 .../MO => webready/orgSpecific}/TranscriptPathway_ix.psql | 0 .../MO => webready/orgSpecific}/TranscriptSequence.psql | 0 .../MO => webready/orgSpecific}/TranscriptSequence_ix.psql | 0 .../MO => webready/orgSpecific}/TransmembraneDomains.psql | 0 .../MO => webready/orgSpecific}/TransmembraneDomains_ix.psql | 0 130 files changed, 0 insertions(+), 0 deletions(-) rename Model/lib/psql/{webtables => webready}/UK/AssociatedDataset.psql (100%) rename Model/lib/psql/{webtables => webready}/UK/DomainAssignment.psql (100%) rename Model/lib/psql/{webtables => webready}/UK/EdaGeneGraph.psql (100%) rename Model/lib/psql/{webtables => webready}/UK/PANResults.psql (100%) rename Model/lib/psql/{webtables => webready}/UK/ProjectTaxon.psql (100%) rename Model/lib/psql/{webtables => webready}/UK/TypeAheadCounts.psql (100%) rename Model/lib/psql/{webtables/UK => webready/comparative}/AlphaFoldGenes.psql (100%) rename Model/lib/psql/{webtables/UK => webready/comparative}/GroupPhylogeneticProfile.psql (100%) rename Model/lib/psql/{webtables/UK => webready/comparative}/OrthologousTranscripts.psql (100%) rename Model/lib/psql/{webtables/UK => webready/comparative}/PhyleticPattern.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/CompoundAttributes.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/CompoundAttributes_ix.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/CompoundId.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/CompoundId_ix.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/CompoundProperties.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/CompoundTypeAheads.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/GroupDomainAttribute.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/GroupDomainAttribute_ix.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/OntologyLevels.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/OntologyLevels_ix.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/PathwayAttributes.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/PathwayAttributes_ix.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/PathwayCompounds.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/PathwayCompounds_ix.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/PathwayNodes.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/PathwayReactions.psql (100%) rename Model/lib/psql/{webtables/MG => webready/global}/PathwayReactions_ix.psql (100%) rename Model/lib/psql/{webtables/UK => webready/global}/SequenceAttributes.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/DatasetExampleSourceId.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/DatasetExampleSourceId_ix.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/OrganismAbbreviationBlast.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/OrganismAbbreviationBlast_ix.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/OrganismAttributes.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/OrganismAttributes_ix.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/Profile.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/ProfileSamples.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/ProfileSamples_ix.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/ProfileType.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/ProfileType_ix.psql (100%) rename Model/lib/psql/{webtables/K => webready/keep}/Profile_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ChIPchipTranscript.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ChIPchipTranscript_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ChrCopyNumbers.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ChrCopyNumbers_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/CodingSequence.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/CodingSequence_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EqtlSpan.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EqtlSpan_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstAlignmentGeneSummary.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstAlignmentGeneSummary_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstAttributes.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstAttributes_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstSequence.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/EstSequence_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneAttributes.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneAttributes_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneCopyNumbers.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneCopyNumbers_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneGoTable.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneGoTable_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneGoTerms.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneGoTerms_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneId.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneId_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneIntJuncStats.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneIntJuncStats_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneIntronJunction.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneIntronJunction_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneLocations.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneLocations_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneMaxIntronGIJ.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneMaxIntronGIJ_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneModelDump.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneModelDump_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneSummaryFilter.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GeneSummaryFilter_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSeqAttributes.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSeqAttributes_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSequenceId.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSequenceId_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSequenceSequence.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GenomicSequenceSequence_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GoTermSummary.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/GoTermSummary_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/IntronSupportLevel.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/IntronSupportLevel_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/IntronUtrCoords.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/IntronUtrCoords_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/NameMappingGIJ.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/NameMappingGIJ_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/OrganismAbbreviation.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/OrganismAbbreviation_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/OrganismSelectTaxonRank.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/OrganismSelectTaxonRank_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PANExtDbRls.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PANIO.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PANIO_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PathwayNodeGene.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PathwayNodeGene_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PathwaysGeneTable.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PathwaysGeneTable_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PdbSimilarity.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/PdbSimilarity_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ProteinAttributes.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ProteinAttributes_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ProteinSequence.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/ProteinSequence_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/RnaSeqStats.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/RnaSeqStats_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SequenceEnzymeClass.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SequenceEnzymeClass_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SequencePieceClosure.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SignalPeptideDomains.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/SignalPeptideDomains_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TFBSGene.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TFBSGene_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TaxonSpecies.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TaxonSpecies_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/Taxonomy.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/Taxonomy_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptAttributes.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptAttributes_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptCenDistance.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptCenDistance_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptPathway.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptPathway_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptSequence.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TranscriptSequence_ix.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TransmembraneDomains.psql (100%) rename Model/lib/psql/{webtables/MO => webready/orgSpecific}/TransmembraneDomains_ix.psql (100%) diff --git a/Model/lib/psql/webtables/UK/AssociatedDataset.psql b/Model/lib/psql/webready/UK/AssociatedDataset.psql similarity index 100% rename from Model/lib/psql/webtables/UK/AssociatedDataset.psql rename to Model/lib/psql/webready/UK/AssociatedDataset.psql diff --git a/Model/lib/psql/webtables/UK/DomainAssignment.psql b/Model/lib/psql/webready/UK/DomainAssignment.psql similarity index 100% rename from Model/lib/psql/webtables/UK/DomainAssignment.psql rename to Model/lib/psql/webready/UK/DomainAssignment.psql diff --git a/Model/lib/psql/webtables/UK/EdaGeneGraph.psql b/Model/lib/psql/webready/UK/EdaGeneGraph.psql similarity index 100% rename from Model/lib/psql/webtables/UK/EdaGeneGraph.psql rename to Model/lib/psql/webready/UK/EdaGeneGraph.psql diff --git a/Model/lib/psql/webtables/UK/PANResults.psql b/Model/lib/psql/webready/UK/PANResults.psql similarity index 100% rename from Model/lib/psql/webtables/UK/PANResults.psql rename to Model/lib/psql/webready/UK/PANResults.psql diff --git a/Model/lib/psql/webtables/UK/ProjectTaxon.psql b/Model/lib/psql/webready/UK/ProjectTaxon.psql similarity index 100% rename from Model/lib/psql/webtables/UK/ProjectTaxon.psql rename to Model/lib/psql/webready/UK/ProjectTaxon.psql diff --git a/Model/lib/psql/webtables/UK/TypeAheadCounts.psql b/Model/lib/psql/webready/UK/TypeAheadCounts.psql similarity index 100% rename from Model/lib/psql/webtables/UK/TypeAheadCounts.psql rename to Model/lib/psql/webready/UK/TypeAheadCounts.psql diff --git a/Model/lib/psql/webtables/UK/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql similarity index 100% rename from Model/lib/psql/webtables/UK/AlphaFoldGenes.psql rename to Model/lib/psql/webready/comparative/AlphaFoldGenes.psql diff --git a/Model/lib/psql/webtables/UK/GroupPhylogeneticProfile.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql similarity index 100% rename from Model/lib/psql/webtables/UK/GroupPhylogeneticProfile.psql rename to Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql diff --git a/Model/lib/psql/webtables/UK/OrthologousTranscripts.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql similarity index 100% rename from Model/lib/psql/webtables/UK/OrthologousTranscripts.psql rename to Model/lib/psql/webready/comparative/OrthologousTranscripts.psql diff --git a/Model/lib/psql/webtables/UK/PhyleticPattern.psql b/Model/lib/psql/webready/comparative/PhyleticPattern.psql similarity index 100% rename from Model/lib/psql/webtables/UK/PhyleticPattern.psql rename to Model/lib/psql/webready/comparative/PhyleticPattern.psql diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes.psql b/Model/lib/psql/webready/global/CompoundAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/MG/CompoundAttributes.psql rename to Model/lib/psql/webready/global/CompoundAttributes.psql diff --git a/Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql b/Model/lib/psql/webready/global/CompoundAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MG/CompoundAttributes_ix.psql rename to Model/lib/psql/webready/global/CompoundAttributes_ix.psql diff --git a/Model/lib/psql/webtables/MG/CompoundId.psql b/Model/lib/psql/webready/global/CompoundId.psql similarity index 100% rename from Model/lib/psql/webtables/MG/CompoundId.psql rename to Model/lib/psql/webready/global/CompoundId.psql diff --git a/Model/lib/psql/webtables/MG/CompoundId_ix.psql b/Model/lib/psql/webready/global/CompoundId_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MG/CompoundId_ix.psql rename to Model/lib/psql/webready/global/CompoundId_ix.psql diff --git a/Model/lib/psql/webtables/MG/CompoundProperties.psql b/Model/lib/psql/webready/global/CompoundProperties.psql similarity index 100% rename from Model/lib/psql/webtables/MG/CompoundProperties.psql rename to Model/lib/psql/webready/global/CompoundProperties.psql diff --git a/Model/lib/psql/webtables/MG/CompoundTypeAheads.psql b/Model/lib/psql/webready/global/CompoundTypeAheads.psql similarity index 100% rename from Model/lib/psql/webtables/MG/CompoundTypeAheads.psql rename to Model/lib/psql/webready/global/CompoundTypeAheads.psql diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute.psql b/Model/lib/psql/webready/global/GroupDomainAttribute.psql similarity index 100% rename from Model/lib/psql/webtables/MG/GroupDomainAttribute.psql rename to Model/lib/psql/webready/global/GroupDomainAttribute.psql diff --git a/Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql b/Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MG/GroupDomainAttribute_ix.psql rename to Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql diff --git a/Model/lib/psql/webtables/MG/OntologyLevels.psql b/Model/lib/psql/webready/global/OntologyLevels.psql similarity index 100% rename from Model/lib/psql/webtables/MG/OntologyLevels.psql rename to Model/lib/psql/webready/global/OntologyLevels.psql diff --git a/Model/lib/psql/webtables/MG/OntologyLevels_ix.psql b/Model/lib/psql/webready/global/OntologyLevels_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MG/OntologyLevels_ix.psql rename to Model/lib/psql/webready/global/OntologyLevels_ix.psql diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes.psql b/Model/lib/psql/webready/global/PathwayAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/MG/PathwayAttributes.psql rename to Model/lib/psql/webready/global/PathwayAttributes.psql diff --git a/Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql b/Model/lib/psql/webready/global/PathwayAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MG/PathwayAttributes_ix.psql rename to Model/lib/psql/webready/global/PathwayAttributes_ix.psql diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds.psql b/Model/lib/psql/webready/global/PathwayCompounds.psql similarity index 100% rename from Model/lib/psql/webtables/MG/PathwayCompounds.psql rename to Model/lib/psql/webready/global/PathwayCompounds.psql diff --git a/Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql b/Model/lib/psql/webready/global/PathwayCompounds_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MG/PathwayCompounds_ix.psql rename to Model/lib/psql/webready/global/PathwayCompounds_ix.psql diff --git a/Model/lib/psql/webtables/MG/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql similarity index 100% rename from Model/lib/psql/webtables/MG/PathwayNodes.psql rename to Model/lib/psql/webready/global/PathwayNodes.psql diff --git a/Model/lib/psql/webtables/MG/PathwayReactions.psql b/Model/lib/psql/webready/global/PathwayReactions.psql similarity index 100% rename from Model/lib/psql/webtables/MG/PathwayReactions.psql rename to Model/lib/psql/webready/global/PathwayReactions.psql diff --git a/Model/lib/psql/webtables/MG/PathwayReactions_ix.psql b/Model/lib/psql/webready/global/PathwayReactions_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MG/PathwayReactions_ix.psql rename to Model/lib/psql/webready/global/PathwayReactions_ix.psql diff --git a/Model/lib/psql/webtables/UK/SequenceAttributes.psql b/Model/lib/psql/webready/global/SequenceAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/UK/SequenceAttributes.psql rename to Model/lib/psql/webready/global/SequenceAttributes.psql diff --git a/Model/lib/psql/webtables/K/DatasetExampleSourceId.psql b/Model/lib/psql/webready/keep/DatasetExampleSourceId.psql similarity index 100% rename from Model/lib/psql/webtables/K/DatasetExampleSourceId.psql rename to Model/lib/psql/webready/keep/DatasetExampleSourceId.psql diff --git a/Model/lib/psql/webtables/K/DatasetExampleSourceId_ix.psql b/Model/lib/psql/webready/keep/DatasetExampleSourceId_ix.psql similarity index 100% rename from Model/lib/psql/webtables/K/DatasetExampleSourceId_ix.psql rename to Model/lib/psql/webready/keep/DatasetExampleSourceId_ix.psql diff --git a/Model/lib/psql/webtables/K/OrganismAbbreviationBlast.psql b/Model/lib/psql/webready/keep/OrganismAbbreviationBlast.psql similarity index 100% rename from Model/lib/psql/webtables/K/OrganismAbbreviationBlast.psql rename to Model/lib/psql/webready/keep/OrganismAbbreviationBlast.psql diff --git a/Model/lib/psql/webtables/K/OrganismAbbreviationBlast_ix.psql b/Model/lib/psql/webready/keep/OrganismAbbreviationBlast_ix.psql similarity index 100% rename from Model/lib/psql/webtables/K/OrganismAbbreviationBlast_ix.psql rename to Model/lib/psql/webready/keep/OrganismAbbreviationBlast_ix.psql diff --git a/Model/lib/psql/webtables/K/OrganismAttributes.psql b/Model/lib/psql/webready/keep/OrganismAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/K/OrganismAttributes.psql rename to Model/lib/psql/webready/keep/OrganismAttributes.psql diff --git a/Model/lib/psql/webtables/K/OrganismAttributes_ix.psql b/Model/lib/psql/webready/keep/OrganismAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/K/OrganismAttributes_ix.psql rename to Model/lib/psql/webready/keep/OrganismAttributes_ix.psql diff --git a/Model/lib/psql/webtables/K/Profile.psql b/Model/lib/psql/webready/keep/Profile.psql similarity index 100% rename from Model/lib/psql/webtables/K/Profile.psql rename to Model/lib/psql/webready/keep/Profile.psql diff --git a/Model/lib/psql/webtables/K/ProfileSamples.psql b/Model/lib/psql/webready/keep/ProfileSamples.psql similarity index 100% rename from Model/lib/psql/webtables/K/ProfileSamples.psql rename to Model/lib/psql/webready/keep/ProfileSamples.psql diff --git a/Model/lib/psql/webtables/K/ProfileSamples_ix.psql b/Model/lib/psql/webready/keep/ProfileSamples_ix.psql similarity index 100% rename from Model/lib/psql/webtables/K/ProfileSamples_ix.psql rename to Model/lib/psql/webready/keep/ProfileSamples_ix.psql diff --git a/Model/lib/psql/webtables/K/ProfileType.psql b/Model/lib/psql/webready/keep/ProfileType.psql similarity index 100% rename from Model/lib/psql/webtables/K/ProfileType.psql rename to Model/lib/psql/webready/keep/ProfileType.psql diff --git a/Model/lib/psql/webtables/K/ProfileType_ix.psql b/Model/lib/psql/webready/keep/ProfileType_ix.psql similarity index 100% rename from Model/lib/psql/webtables/K/ProfileType_ix.psql rename to Model/lib/psql/webready/keep/ProfileType_ix.psql diff --git a/Model/lib/psql/webtables/K/Profile_ix.psql b/Model/lib/psql/webready/keep/Profile_ix.psql similarity index 100% rename from Model/lib/psql/webtables/K/Profile_ix.psql rename to Model/lib/psql/webready/keep/Profile_ix.psql diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ChIPchipTranscript.psql rename to Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql diff --git a/Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ChIPchipTranscript_ix.psql rename to Model/lib/psql/webready/orgSpecific/ChIPchipTranscript_ix.psql diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers.psql b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ChrCopyNumbers.psql rename to Model/lib/psql/webready/orgSpecific/ChrCopyNumbers.psql diff --git a/Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ChrCopyNumbers_ix.psql rename to Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql diff --git a/Model/lib/psql/webtables/MO/CodingSequence.psql b/Model/lib/psql/webready/orgSpecific/CodingSequence.psql similarity index 100% rename from Model/lib/psql/webtables/MO/CodingSequence.psql rename to Model/lib/psql/webready/orgSpecific/CodingSequence.psql diff --git a/Model/lib/psql/webtables/MO/CodingSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/CodingSequence_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/CodingSequence_ix.psql rename to Model/lib/psql/webready/orgSpecific/CodingSequence_ix.psql diff --git a/Model/lib/psql/webtables/MO/EqtlSpan.psql b/Model/lib/psql/webready/orgSpecific/EqtlSpan.psql similarity index 100% rename from Model/lib/psql/webtables/MO/EqtlSpan.psql rename to Model/lib/psql/webready/orgSpecific/EqtlSpan.psql diff --git a/Model/lib/psql/webtables/MO/EqtlSpan_ix.psql b/Model/lib/psql/webready/orgSpecific/EqtlSpan_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/EqtlSpan_ix.psql rename to Model/lib/psql/webready/orgSpecific/EqtlSpan_ix.psql diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql similarity index 100% rename from Model/lib/psql/webtables/MO/EstAlignmentGeneSummary.psql rename to Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql diff --git a/Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/EstAlignmentGeneSummary_ix.psql rename to Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary_ix.psql diff --git a/Model/lib/psql/webtables/MO/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/MO/EstAttributes.psql rename to Model/lib/psql/webready/orgSpecific/EstAttributes.psql diff --git a/Model/lib/psql/webtables/MO/EstAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/EstAttributes_ix.psql rename to Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql diff --git a/Model/lib/psql/webtables/MO/EstSequence.psql b/Model/lib/psql/webready/orgSpecific/EstSequence.psql similarity index 100% rename from Model/lib/psql/webtables/MO/EstSequence.psql rename to Model/lib/psql/webready/orgSpecific/EstSequence.psql diff --git a/Model/lib/psql/webtables/MO/EstSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/EstSequence_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/EstSequence_ix.psql rename to Model/lib/psql/webready/orgSpecific/EstSequence_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneAttributes.psql rename to Model/lib/psql/webready/orgSpecific/GeneAttributes.psql diff --git a/Model/lib/psql/webtables/MO/GeneAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneAttributes_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers.psql b/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneCopyNumbers.psql rename to Model/lib/psql/webready/orgSpecific/GeneCopyNumbers.psql diff --git a/Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneCopyNumbers_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneCopyNumbers_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneCopyNumbers_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneGoTable.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTable.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneGoTable.psql rename to Model/lib/psql/webready/orgSpecific/GeneGoTable.psql diff --git a/Model/lib/psql/webtables/MO/GeneGoTable_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneGoTable_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTerms.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneGoTerms.psql rename to Model/lib/psql/webready/orgSpecific/GeneGoTerms.psql diff --git a/Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTerms_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneGoTerms_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneGoTerms_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneId.psql b/Model/lib/psql/webready/orgSpecific/GeneId.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneId.psql rename to Model/lib/psql/webready/orgSpecific/GeneId.psql diff --git a/Model/lib/psql/webtables/MO/GeneId_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneId_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneId_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneIntJuncStats.psql rename to Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql diff --git a/Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneIntJuncStats_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneIntronJunction.psql rename to Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql diff --git a/Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneIntronJunction_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneLocations.psql b/Model/lib/psql/webready/orgSpecific/GeneLocations.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneLocations.psql rename to Model/lib/psql/webready/orgSpecific/GeneLocations.psql diff --git a/Model/lib/psql/webtables/MO/GeneLocations_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneLocations_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneLocations_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneLocations_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneMaxIntronGIJ.psql rename to Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql diff --git a/Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneMaxIntronGIJ_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneModelDump.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneModelDump.psql rename to Model/lib/psql/webready/orgSpecific/GeneModelDump.psql diff --git a/Model/lib/psql/webtables/MO/GeneModelDump_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneModelDump_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter.psql b/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneSummaryFilter.psql rename to Model/lib/psql/webready/orgSpecific/GeneSummaryFilter.psql diff --git a/Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneSummaryFilter_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GeneSummaryFilter_ix.psql rename to Model/lib/psql/webready/orgSpecific/GeneSummaryFilter_ix.psql diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GenomicSeqAttributes.psql rename to Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql diff --git a/Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GenomicSeqAttributes_ix.psql rename to Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceId.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GenomicSequenceId.psql rename to Model/lib/psql/webready/orgSpecific/GenomicSequenceId.psql diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceId_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GenomicSequenceId_ix.psql rename to Model/lib/psql/webready/orgSpecific/GenomicSequenceId_ix.psql diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GenomicSequenceSequence.psql rename to Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence.psql diff --git a/Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GenomicSequenceSequence_ix.psql rename to Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql diff --git a/Model/lib/psql/webtables/MO/GoTermSummary.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GoTermSummary.psql rename to Model/lib/psql/webready/orgSpecific/GoTermSummary.psql diff --git a/Model/lib/psql/webtables/MO/GoTermSummary_ix.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/GoTermSummary_ix.psql rename to Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql diff --git a/Model/lib/psql/webtables/MO/IntronSupportLevel.psql b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql similarity index 100% rename from Model/lib/psql/webtables/MO/IntronSupportLevel.psql rename to Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql diff --git a/Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/IntronSupportLevel_ix.psql rename to Model/lib/psql/webready/orgSpecific/IntronSupportLevel_ix.psql diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords.psql b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords.psql similarity index 100% rename from Model/lib/psql/webtables/MO/IntronUtrCoords.psql rename to Model/lib/psql/webready/orgSpecific/IntronUtrCoords.psql diff --git a/Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/IntronUtrCoords_ix.psql rename to Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql similarity index 100% rename from Model/lib/psql/webtables/MO/NameMappingGIJ.psql rename to Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql diff --git a/Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/NameMappingGIJ_ix.psql rename to Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation.psql b/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation.psql similarity index 100% rename from Model/lib/psql/webtables/MO/OrganismAbbreviation.psql rename to Model/lib/psql/webready/orgSpecific/OrganismAbbreviation.psql diff --git a/Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql b/Model/lib/psql/webready/orgSpecific/OrganismAbbreviation_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/OrganismAbbreviation_ix.psql rename to Model/lib/psql/webready/orgSpecific/OrganismAbbreviation_ix.psql diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql b/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank.psql similarity index 100% rename from Model/lib/psql/webtables/MO/OrganismSelectTaxonRank.psql rename to Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank.psql diff --git a/Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql b/Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/OrganismSelectTaxonRank_ix.psql rename to Model/lib/psql/webready/orgSpecific/OrganismSelectTaxonRank_ix.psql diff --git a/Model/lib/psql/webtables/MO/PANExtDbRls.psql b/Model/lib/psql/webready/orgSpecific/PANExtDbRls.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PANExtDbRls.psql rename to Model/lib/psql/webready/orgSpecific/PANExtDbRls.psql diff --git a/Model/lib/psql/webtables/MO/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PANIO.psql rename to Model/lib/psql/webready/orgSpecific/PANIO.psql diff --git a/Model/lib/psql/webtables/MO/PANIO_ix.psql b/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PANIO_ix.psql rename to Model/lib/psql/webready/orgSpecific/PANIO_ix.psql diff --git a/Model/lib/psql/webtables/MO/PathwayNodeGene.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PathwayNodeGene.psql rename to Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql diff --git a/Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PathwayNodeGene_ix.psql rename to Model/lib/psql/webready/orgSpecific/PathwayNodeGene_ix.psql diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PathwaysGeneTable.psql rename to Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql diff --git a/Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PathwaysGeneTable_ix.psql rename to Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity.psql b/Model/lib/psql/webready/orgSpecific/PdbSimilarity.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PdbSimilarity.psql rename to Model/lib/psql/webready/orgSpecific/PdbSimilarity.psql diff --git a/Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql b/Model/lib/psql/webready/orgSpecific/PdbSimilarity_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/PdbSimilarity_ix.psql rename to Model/lib/psql/webready/orgSpecific/PdbSimilarity_ix.psql diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ProteinAttributes.psql rename to Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql diff --git a/Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ProteinAttributes_ix.psql rename to Model/lib/psql/webready/orgSpecific/ProteinAttributes_ix.psql diff --git a/Model/lib/psql/webtables/MO/ProteinSequence.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ProteinSequence.psql rename to Model/lib/psql/webready/orgSpecific/ProteinSequence.psql diff --git a/Model/lib/psql/webtables/MO/ProteinSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/ProteinSequence_ix.psql rename to Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats.psql b/Model/lib/psql/webready/orgSpecific/RnaSeqStats.psql similarity index 100% rename from Model/lib/psql/webtables/MO/RnaSeqStats.psql rename to Model/lib/psql/webready/orgSpecific/RnaSeqStats.psql diff --git a/Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql b/Model/lib/psql/webready/orgSpecific/RnaSeqStats_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/RnaSeqStats_ix.psql rename to Model/lib/psql/webready/orgSpecific/RnaSeqStats_ix.psql diff --git a/Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql b/Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass.psql similarity index 100% rename from Model/lib/psql/webtables/MO/SequenceEnzymeClass.psql rename to Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass.psql diff --git a/Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/SequenceEnzymeClass_ix.psql rename to Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass_ix.psql diff --git a/Model/lib/psql/webtables/MO/SequencePieceClosure.psql b/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql similarity index 100% rename from Model/lib/psql/webtables/MO/SequencePieceClosure.psql rename to Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains.psql b/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains.psql similarity index 100% rename from Model/lib/psql/webtables/MO/SignalPeptideDomains.psql rename to Model/lib/psql/webready/orgSpecific/SignalPeptideDomains.psql diff --git a/Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql b/Model/lib/psql/webready/orgSpecific/SignalPeptideDomains_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/SignalPeptideDomains_ix.psql rename to Model/lib/psql/webready/orgSpecific/SignalPeptideDomains_ix.psql diff --git a/Model/lib/psql/webtables/MO/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TFBSGene.psql rename to Model/lib/psql/webready/orgSpecific/TFBSGene.psql diff --git a/Model/lib/psql/webtables/MO/TFBSGene_ix.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TFBSGene_ix.psql rename to Model/lib/psql/webready/orgSpecific/TFBSGene_ix.psql diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies.psql b/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TaxonSpecies.psql rename to Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql diff --git a/Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql b/Model/lib/psql/webready/orgSpecific/TaxonSpecies_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TaxonSpecies_ix.psql rename to Model/lib/psql/webready/orgSpecific/TaxonSpecies_ix.psql diff --git a/Model/lib/psql/webtables/MO/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql similarity index 100% rename from Model/lib/psql/webtables/MO/Taxonomy.psql rename to Model/lib/psql/webready/orgSpecific/Taxonomy.psql diff --git a/Model/lib/psql/webtables/MO/Taxonomy_ix.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/Taxonomy_ix.psql rename to Model/lib/psql/webready/orgSpecific/Taxonomy_ix.psql diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TranscriptAttributes.psql rename to Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql diff --git a/Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TranscriptAttributes_ix.psql rename to Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance.psql b/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TranscriptCenDistance.psql rename to Model/lib/psql/webready/orgSpecific/TranscriptCenDistance.psql diff --git a/Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptCenDistance_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TranscriptCenDistance_ix.psql rename to Model/lib/psql/webready/orgSpecific/TranscriptCenDistance_ix.psql diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TranscriptPathway.psql rename to Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql diff --git a/Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TranscriptPathway_ix.psql rename to Model/lib/psql/webready/orgSpecific/TranscriptPathway_ix.psql diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence.psql b/Model/lib/psql/webready/orgSpecific/TranscriptSequence.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TranscriptSequence.psql rename to Model/lib/psql/webready/orgSpecific/TranscriptSequence.psql diff --git a/Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptSequence_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TranscriptSequence_ix.psql rename to Model/lib/psql/webready/orgSpecific/TranscriptSequence_ix.psql diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains.psql b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TransmembraneDomains.psql rename to Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql diff --git a/Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains_ix.psql similarity index 100% rename from Model/lib/psql/webtables/MO/TransmembraneDomains_ix.psql rename to Model/lib/psql/webready/orgSpecific/TransmembraneDomains_ix.psql From 1b021a547242d6c7091fc2713917250c75b8a8c2 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Mon, 19 May 2025 18:32:42 -0400 Subject: [PATCH 026/112] rename webready folders --- Model/lib/psql/webready/{UK => unknown}/AssociatedDataset.psql | 0 Model/lib/psql/webready/{UK => unknown}/DomainAssignment.psql | 0 Model/lib/psql/webready/{UK => unknown}/EdaGeneGraph.psql | 0 Model/lib/psql/webready/{UK => unknown}/PANResults.psql | 0 Model/lib/psql/webready/{UK => unknown}/ProjectTaxon.psql | 0 Model/lib/psql/webready/{UK => unknown}/TypeAheadCounts.psql | 0 6 files changed, 0 insertions(+), 0 deletions(-) rename Model/lib/psql/webready/{UK => unknown}/AssociatedDataset.psql (100%) rename Model/lib/psql/webready/{UK => unknown}/DomainAssignment.psql (100%) rename Model/lib/psql/webready/{UK => unknown}/EdaGeneGraph.psql (100%) rename Model/lib/psql/webready/{UK => unknown}/PANResults.psql (100%) rename Model/lib/psql/webready/{UK => unknown}/ProjectTaxon.psql (100%) rename Model/lib/psql/webready/{UK => unknown}/TypeAheadCounts.psql (100%) diff --git a/Model/lib/psql/webready/UK/AssociatedDataset.psql b/Model/lib/psql/webready/unknown/AssociatedDataset.psql similarity index 100% rename from Model/lib/psql/webready/UK/AssociatedDataset.psql rename to Model/lib/psql/webready/unknown/AssociatedDataset.psql diff --git a/Model/lib/psql/webready/UK/DomainAssignment.psql b/Model/lib/psql/webready/unknown/DomainAssignment.psql similarity index 100% rename from Model/lib/psql/webready/UK/DomainAssignment.psql rename to Model/lib/psql/webready/unknown/DomainAssignment.psql diff --git a/Model/lib/psql/webready/UK/EdaGeneGraph.psql b/Model/lib/psql/webready/unknown/EdaGeneGraph.psql similarity index 100% rename from Model/lib/psql/webready/UK/EdaGeneGraph.psql rename to Model/lib/psql/webready/unknown/EdaGeneGraph.psql diff --git a/Model/lib/psql/webready/UK/PANResults.psql b/Model/lib/psql/webready/unknown/PANResults.psql similarity index 100% rename from Model/lib/psql/webready/UK/PANResults.psql rename to Model/lib/psql/webready/unknown/PANResults.psql diff --git a/Model/lib/psql/webready/UK/ProjectTaxon.psql b/Model/lib/psql/webready/unknown/ProjectTaxon.psql similarity index 100% rename from Model/lib/psql/webready/UK/ProjectTaxon.psql rename to Model/lib/psql/webready/unknown/ProjectTaxon.psql diff --git a/Model/lib/psql/webready/UK/TypeAheadCounts.psql b/Model/lib/psql/webready/unknown/TypeAheadCounts.psql similarity index 100% rename from Model/lib/psql/webready/UK/TypeAheadCounts.psql rename to Model/lib/psql/webready/unknown/TypeAheadCounts.psql From d5798edb5a5f91f3a145ff388f0b9f309c0e21bc Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 20 May 2025 14:46:46 -0400 Subject: [PATCH 027/112] prune tuning manager file --- .../xml/tuningManager/apiTuningManager.xml | 10136 +++------------- Model/lib/xml/tuningManager/tablePruning.txt | 4 +- 2 files changed, 1783 insertions(+), 8357 deletions(-) diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml index b7a4f3217b..6a783fcb6d 100644 --- a/Model/lib/xml/tuningManager/apiTuningManager.xml +++ b/Model/lib/xml/tuningManager/apiTuningManager.xml @@ -3,742 +3,6 @@ - - Locations and Sequence of Transmembrane Domains (TMHMM) - - - - - - - - - - - - - - - - - - - - - - - Locations and Sequence of Signal Peptide Domains (SignalP) - - - - - - - - - - = .5 - OR spf.signal_probability IS NULL - OR ((spf.means_score + spf.maxy_score) / 2) >= .5 - OR ( spf.maxy_conclusion + spf.maxc_conclusion + spf.maxs_conclusion + spf.means_conclusion ) >= 3 - ) - ORDER BY - spf.aa_sequence_id, spf.aa_feature_id - ]]> - - - - - - - - - - - Taxon ranks for organisms - - - - - - - - - - - - Attributes for Metabolic Pathways - - - - - - - - - - - - - - - - - synteny stats for each reference-taxon / comparison-taxon pair - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - the max and min depth of each ontology term in OntologyRelationship. Used by the GoTermSummary tuning table - - - - - - - - - - - - - - - - - - - - - GeneGoTerms: each row represents one GO term assignment to one gene, right from what was loaded. - - - - - - - - - - - - - - - - - - - - - - A tuning table for the gene record GO term table - - - - - - - - - - - - Map each GO term that is assigned to at least one gene to a GoSubset term @@ -817,88 +81,6 @@ - - GoTermSummary: each row represents one GO term assignment to one gene. - (Typically, a gene has multiple such assignments.) This is used for - finding gene-GO mappings, such as for the gene-page GO table. - - - - - - - - - - - - - - - - - - - - Each row maps a dataset onto an ID for which the dataset contains data; each dataset gets one such row. @@ -930,38 +112,6 @@ - - Stores per-organism information. Used by the organism record, as well as by project_id(), the function that maps an organism to a project. @@ -1320,7361 +470,2024 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id) - - - Each record maps a gene to a PDB structure. Used by the model to find - genes that have a PDB structure and to find the PDB structures for a - given gene. - - - - - - - - - - - - - GeneId maps any valid ID for a gene onto its official ID. These two quantities - are stored in the "id" and "gene" columns, respectively. The "unique_mapping" - column is set to 1 for IDs which map to only one gene. - - Most of the CREATE TABLE statement is made up of the union of nine subqueries, - each of which looks in a different place for gene IDs. Each subquery populates - the "union_member" field with a different literal string, to make it easier to - understand which part (or parts) of the SQL is responsible for each ID-to-gene - mapping. + + + + Stores, for each transcript, a string containing the gene-relative coordinates + of all its introns and UTRs. - - - - - - - - - - - - - - - - - = pred_loc.start_min - AND pred_loc.is_reversed = gene_loc.is_reversed - AND pred_loc.external_database_release_id = edr.external_database_release_id - AND edr.external_database_id = ed.external_database_id - UNION - SELECT ng.name AS id, gf.source_id AS gene, - 'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */ - FROM dots.GeneFeature gf, dots.NaFeatureNaGene nfng, dots.NaGene ng, - sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed - WHERE gf.na_feature_id = nfng.na_feature_id - AND ng.na_gene_id = nfng.na_gene_id - AND gf.external_database_release_id = edr.external_database_release_id - AND edr.external_database_id = ed.external_database_id - UNION - SELECT source_id AS id, source_id AS gene, - 'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */ - FROM dots.GeneFeature gf, - sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed - WHERE gf.external_database_release_id = edr.external_database_release_id - AND edr.external_database_id = ed.external_database_id - UNION - SELECT n.name AS id, gf.source_id AS gene, - 'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name - from dots.genefeature gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d, - ( select na_feature_id, name - from apidb.GeneFeatureName - where is_preferred = 1 - EXCEPT - -- suppress gene/name associations from the *DELETED_RSRC databases - select gfn.na_feature_id, gfn.name - from apidb.GeneFeatureName gfn, - sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr - where gfn.external_database_release_id = edr.external_database_release_id - and ed.external_database_id = edr.external_database_id - and ed.name like '%DELETED_RSRC' - ) n - where n.na_feature_id = gf.na_feature_id - and gf.external_database_release_id = r.external_database_release_id - and r.external_database_id = d.external_database_id - UNION - select dr.primary_identifier as id, - gf.source_id as gene, - 'AA feature DbRef primary ID' as union_member, - ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */ - from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf, - dots.DbRefAaFeature draf, sres.DbRef dr, - sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed - where gf.na_feature_id = t.parent_id - and t.na_feature_id = taf.na_feature_id - and taf.aa_feature_id = draf.aa_feature_id - and draf.db_ref_id = dr.db_ref_id - and dr.external_database_release_id = edr.external_database_release_id - and edr.external_database_id = ed.external_database_id - and ed.name - not in ('INTERPRO', 'PFAM', 'PIRSF', 'PRODOM', 'PROSITEPROFILES', - 'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER', - 'PRINTS','SCANPROSITE','SFLD') - ) mapping, - dots.GeneFeature gf, dots.NaSequence ns - WHERE mapping.gene = gf.source_id - AND gf.na_sequence_id = ns.na_sequence_id - AND (ns.taxon_id::varchar = '&filterValue' or length('&filterValue') = 0) - AND (gf.is_predicted != 1 OR gf.is_predicted is null) - GROUP BY mapping.id, mapping.gene - ]]> - - - - + - - - ]]> - - - - + + + + + + Stores special webservice abbreviations which are not standard organism + names. Each record maps an organism name onto this abbreviation, as + well as the species name and project ID. Used by the model and as an + input in the creation of the OrganismAbbreviationBlast tuning table. + Propagated to portal instances. + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - This table maps IDs for a sequence onto the official ID of the sequence. - It is analogous to GeneId, which does the same thing for genes. Used by - genomic-sequence record queries, by the sequence retrieval tool, and by - the BasketFixer, which updates users' baskets at release time to replace - old IDs with updated ones. + + + + Group species by higher level taxonomy. Each row associates a taxon of + interest with one of its ancestors in the taxon tree. Used in parameter + queries that have to know about the taxon tree. Propagated to portal + instances. - - - - - - + + + + - - - - - Used by GeneTables.Epitopes to map a gene to its epitopes. - - - - - - - - - - Stores (transcript, sequence, distance from centromere) 3-tuples for transcripts - that lie on a sequence for which we have a centomere location. + + Each record maps an organism to its BLAST abbreviation. Used by + BLAST-query parameters. Propagated to portal instances. - - - - - - + + + + - - The BFMV for proteins. Each protein gets a single record, which - stores all its attributes. Used mainly to create TranscriptAttributes + + For each project, show which BLAST databases are available for which + species. Used in BLAST param queries. Propagated to portal instances. - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + Each row stores mass-spec. based expression evidence for one sample of + one experiment for one gene. Used for mass spec queries in the model, + GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary + tuning table. + + + + + + + - - - - - - + + + + + + + + + + - - The BFMV for the gene record. Each gene gets a single record, which - stores all its attributes. Used widely, in the model and elsewhere, for - queries involving genes, as well as in the creation of more than a - dozen other tuning tables. + + Stores summary information from annotated genomes to facilitate overview section of gene page - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - 10 ) - WHERE ta.project_id = 'TriTrypDB' - ]]> - - - - - - - - - - + + Mass-spec experiment results for a peptide. Used by the model, GBrowse, + and PBrowse. + + + + + + + - - - ]]> - - - - + + + Data from the Seattle Structural Genomics Center for Infectious Disease, + populated from their web service. Used in the gene record. + + - + + Used by the model and GBrowse, as well as an input in the creation of + the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary. - - - - - - + - UNION - SELECT gene_source_id, source_id, taxon_id, organism, ontology_term_name - , string_value, number_value - FROM ( - select atr.gene_source_id, atr.source_id, atr.taxon_id, atr.organism, - 'long_transcript_novelty' as ontology_term_name, ltr.transcript_novelty string_value, - null as number_value, sum(counts.reads) as total_reads, ltr.transcript_length - from TranscriptAttributes atr - , apidb.longreadtranscript ltr - , JSON_TABLE(count_data, '$.*' COLUMNS (reads INTEGER PATH '$')) counts - where ltr.gene_source_id = atr.gene_source_id - AND ltr.transcript_length >= 20 - GROUP BY atr.gene_source_id, atr.source_id, atr.taxon_id, atr.organism, ltr.transcript_novelty, ltr.transcript_length - ) - WHERE total_reads >= 5 - UNION - select atr.gene_source_id, atr.source_id, atr.taxon_id, atr.organism, - 'intron_junction' as ontology_term_name, it.string_value string_value, - null as number_value - from - IntronSupportLevel it - ,TranscriptAttributes atr - where it.gene_source_id = atr.gene_source_id + + + Used by the model when writing profile data + + + - UNION - select atr.gene_source_id, atr.source_id, atr.taxon_id, atr.organism, - 'unique_reads' as ontology_term_name, null as string_value , gj.total_unique number_value - from - GeneIntronJunction gj - ,TranscriptAttributes atr - where gj.gene_source_id = atr.gene_source_id - ) - ]]> - + + + Associates an organism with the GBrowse and PBrowse tracks available + for it. Used by the gene record. + + + + ---> - - - Stores, for each transcript, a string containing the gene-relative coordinates - of all its introns and UTRs. + + + Each row maps a dataset onto an ID for which the dataset contains data; + each dataset gets one such row. + Used in dataset record queries. - - + + + + + + + + + - - - - - - - - A single product string per gene - - - - - - - - + + Citation info for proteomics datasets, used by GBrowse + + ' || sample || '

' as sample_i + FROM MSPeptideSummary mps, DatasetPresenter ds + -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem + WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern) + ) t + group by name, id + ) + SELECT name, + substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable') + || ' PMID: ' || publications || '

Samples:

' + || sample_table || chr(10) || + ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation + FROM ( + SELECT ds.name as name, ds.summary as description, pubs.contact_email as email, + pubs.pmids as publications, samples.sample_table as sample_table + FROM DatasetPresenter ds, pubs, samples + WHERE ds.dataset_presenter_id = pubs.id + AND ds.dataset_presenter_id = samples.id + ) t ]]>
- - - ]]> - + + + + + + + + + + + - - The BFMV for the gene record. Each gene gets a single record, which - stores all its attributes. - + + + + - - - - - - - + + + - - - ]]> - - - - - - - + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - Each record represents one SNP. Widely used in the model, as well as in - the creation of several other tuning tables, Includes only NGS SNPs. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Each row represents one EST. Used widely in the model, and to make the - tuning tables BlastTypes, OrganismAbbreviationBlast, and OrganismAttributes. - - - - - - - - - - - - - - - - - - - - - - Each row represents a colocated EST alignment - gene pair. Used by the - model, by generateGeneMetrics, and in the creation of the - OrganismAttributes tuning table - - - - - - - - - - - - - = 0 - AND query_sequence.na_sequence_id = ba.query_na_sequence_id - ]]> - - - - - - - - - - - - - - - - - - Each row represents one cosmid or bac end feature; for use in JBrowse. - - - - - - - - - - - - - - - Each record captures info for a strain/protocol app node. Used in the model, including - gene and SNP queries, as well as the gene record. - - - - - - - - - - - - - - - - Each record captures info for a strain/protocol app node. Used in the model, including - gene and SNP queries, as well as the gene record. - - - - - - - - - - - - - - - - - - - - The BFMV for the WDK popset record. Widely used in the model for - queries related to popsets. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Used by the GenesByChipChip(Plasmo|Toxo) query, as well as by - generateGeneMetrics. Also an input to OrganismAttributes. - - - - - - - 0 */ - CASE WHEN ta.is_reversed = 0 - THEN ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ELSE ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - END > 0 - THEN - CASE - WHEN ta.is_reversed = 0 - THEN '-' - ELSE '+' - END - ELSE - CASE - WHEN ta.is_reversed = 1 - THEN '-' - ELSE '+' - END - END as direction, - sr.score1 as score - FROM TranscriptAttributes ta, - Results.segmentresult sr, - Study.StudyLink sl, - Study.Study s - WHERE sr.na_sequence_id = ta.na_sequence_id - AND s.study_id = sl.study_id - AND sl.protocol_app_node_id = sr.protocol_app_node_id - AND lower(s.name) like '%chip%peaks' - AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000) - or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) ) - ]]> - - - - - - - - - - - Used by gene queries, as well as by generateGeneMetrics. Also an input - to OrganismAttributes. - - - - - - 0 */ - CASE WHEN ga.is_reversed = 0 - THEN ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ELSE ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - END > 0 - THEN - CASE - WHEN ga.is_reversed = 0 - THEN '-' - ELSE '+' - END - ELSE - CASE - WHEN ga.is_reversed = 1 - THEN '-' - ELSE '+' - END - END as direction, - aef.* - FROM dots.BindingSiteFeature aef, - apidb.FeatureLocation arrloc, - GeneAttributes ga - WHERE aef.na_feature_id = arrloc.na_feature_id - AND arrloc.na_sequence_id = ga.na_sequence_id - AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000) - or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) ) - ]]> - - - - - - - - - - - - Each record maps a gene onto a subcellular location. Used by - GenesBySubcellularLocalization. - - - - - - - - - - - - - - - - Like dots.SimilaritySpan, except that for sequences that are mapped by - SequencePiece into parts of other sequences, both locations are stored. - Used by GBrowse, and also in the creation of the Blastx tuning table. - - - - - - - - = sim.max_query_end - AND sim.query_id = contig.na_sequence_id - AND sp.virtual_na_sequence_id = scaffold.na_sequence_id - ]]> - - - - - - - - - - - - - SNP Chip only, such as Plasmo barcode, 3k_chp and hd_array - - Each record represents one SNP. Widely used in the model, as well as in - the creation of several other tuning tables - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - this otherwise-unneeded tuning table, which depends on SnpAttributesDoTS, - exists so that the view SnpChipAttributes can be created as a side-effect - - - - - - - - - - - - - - - - Each record stores a Blastp similarity of a gene. Used by the gene-page - Blastp table. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Each record stores a colocated (gene, popset) 2-tuple. Used by the - gene page as well as the PopsetByOverlap query. - - - - - - sim.min_subject_start - AND sim.query_id = ia.na_sequence_id - GROUP BY ia.source_id, fl.feature_source_id - ]]> - - - - - - - - - - - - Each record maps a taxon_id of interest onto the taxon_id of that taxon's - taxon-tree ancestor whose rank is "species". Used by - gene queries, and as an input in the - creation of several tuning tables, including GeneAttributes - - - - - - - - - - - - - - - - - Each record stores a Blastx similarity. Used by GBrowse for the - match:WU_BLASTX track. - - - - - - - - - - - - - - - Each record stores the transcript sequence of one gene. Used by the - gene record and the sequence retrieval tool. Propagated to the portal. - - - - - - - - - - - - - - Each record stores the coding sequence of one gene. Used by the - gene record and the sequence retrieval tool. Propagated to the portal. - - - - - - - - - - - - - - - Each record stores the coding sequence of one gene. Used by the - gene record and the sequence retrieval tool, as well as by - buildTrackOldAnnotationTT. Propagated to the portal. - - - - - - - - - - - - - - - Each record stores the nucleotide sequence for one genomic sequence - that is "official" (in the sense that it can be instantiated as a WDK - sequence record. Used by generatePathoLogicFile and the sequence - retrieval tool Propagated to portal instances. - - - - - - - - - - - - - - - - Each record stores the nucleotide sequence of an EST, for use by the - relevant attribute query in the WDK EST record. Propagated to portal - instances. - - - - - - - - - - - - - - - - Mapping table of experiment and sample names to junction protocol_app_node_id - - - - - - - - - - - - = 1 - GROUP BY protocol_app_node_id - ), part AS ( - SELECT - ij.junctions_pan_id, ij.avg_value, stats.multiplier - , max(ij.expression_pan_id) OVER w as max_exp_pan_id - , max(ij.sample_name) OVER w as max_sample_Name - , max(ij.exp_name) OVER w as max_exp_name - FROM ij, stats - WHERE ij.junctions_pan_id = stats.protocol_app_node_id - WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) - ) - SELECT DISTINCT * FROM ( - SELECT junctions_pan_id - , first_value(max_exp_pan_id) OVER w1 as exp_pan_id - , first_value(max_sample_name) OVER w1 as sample_name - , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands - , multiplier - FROM part - WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) - ) t - ORDER BY junctions_pan_id - ]]> - - - - - - - - - - - - - - - - - - - - - - - Table collects up single row / intronjunction (identified as all junctions with same start, end and strand). Statistics are generated including percentages of max intron score and ratios vs expression on an overall level. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 500000 THEN 500000 ELSE step_mult END as seq_step_mult - FROM ( - SELECT gs.na_sequence_id, gs.length, gs.taxon_id, 25000 * (1 + floor(gs.length/count(*))) as step_mult - FROM apidb.intronjunction ij, dots.nasequence gs - WHERE gs.na_sequence_id = ij.na_sequence_id - GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id - ) t - ORDER BY taxon_id - ) - LOOP - iter_length := idlist.seq_step_mult; - i_first_pos := 1; - i_last_pos := i_first_pos + iter_length; - WHILE i_first_pos < idlist.length - LOOP - INSERT INTO GIJtmp - SELECT DISTINCT - junc.*, - CASE - WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed - THEN 1 - ELSE 0 - END as matches_gene_strand, - last_value(ga.source_id) over w1 as gene_source_id, - last_value(ga.na_feature_id) over w1 as gene_na_feature_id, - CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron - FROM ( - SELECT ij.na_sequence_id,seq.source_id as sequence_source_id,ij.segment_start,ij.segment_end, - sum(ij.unique_reads) as total_unique, round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm, - ij.is_reversed,seq.source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id - FROM apidb.intronjunction ij, namemappinggij je, dots.nasequence seq - WHERE ij.na_sequence_id = idlist.na_sequence_id - AND ij.segment_start between i_first_pos and i_last_pos - AND ij.na_sequence_id = seq.na_sequence_id - AND ij.unique_reads >= 1 - AND je.junctions_pan_id = ij.protocol_app_node_id - AND je.multiplier < 20 - GROUP BY ij.na_sequence_id,ij.segment_start,ij.segment_end, ij.is_reversed,seq.source_id - ) junc - LEFT JOIN GeneIdLocGIJ&1 ga ON - junc.na_sequence_id = ga.na_sequence_id - AND junc.segment_start >= ga.start_min - AND junc.segment_end <= ga.end_max - AND junc.is_reversed = ga.is_reversed - LEFT JOIN annotgij ag ON - junc.na_sequence_id = ag.na_sequence_id - AND junc.segment_start = ag.start_min - AND junc.segment_end = ag.end_max - AND junc.is_reversed = ag.is_reversed - WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron') - WINDOW w1 AS ( - PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type - ORDER BY ga.total_expression ASC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) - ; - commit; - i_first_pos := i_last_pos + 1; - i_last_pos := i_first_pos + iter_length; - END LOOP; - END LOOP; - END; - $$ LANGUAGE PLPGSQL; - ]]> - - - - - - 0 THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) ELSE null END as percent_max, - CASE WHEN maxv.gene_source_id is not null THEN 1 ELSE 0 END as contained, - CAST (null as numeric(10)) as taxon_id, - cast (null as numeric(10)) as upstream_gene_id, - cast (null as numeric) as upstream_distance, - cast (null as numeric(10)) as downstream_gene_id, - cast (null as numeric) as downstream_distance - FROM - gijtmp junc LEFT JOIN - ( - SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm - FROM gijtmp - WHERE gene_source_id is not null - GROUP BY gene_source_id - ) maxv ON junc.gene_source_id = maxv.gene_source_id - ]]> - - - - - - - - - - - - - - - - - Stores statistics for annotated introns used for configuring JBrowse tracks based on organism. - - - - - - - - - - - - Stores maximum values per gene for each sample so percent max intron can be computed for sample table. - - - - - - - - - = j.segment_end - AND ga.is_reversed = j.is_reversed - AND j.protocol_app_node_id = mult.junctions_pan_id - GROUP BY j.protocol_app_node_id, ga.source_id - ); - commit; - END LOOP; - END; - $$ LANGUAGE PLPGSQL; - ]]> - - - - - - - - Each record stores the nucleotide sequence of one popset. Used in the - relevant attribute query of the WDK popset record, as well as by - PopsetClustalOmega. Propagated to portal instances. - - - - - - - - - - - - - - Each record maps an organism name onto an abbreviation, getting the - pair either from apidb.Organism or (in the case of Tvag), hardwired - into the below SQL. This table will eventually be replaced by workflow. - - - - - - - - - - - Stores special webservice abbreviations which are not standard organism - names. Each record maps an organism name onto this abbreviation, as - well as the species name and project ID. Used by the model and as an - input in the creation of the OrganismAbbreviationBlast tuning table. - Propagated to portal instances. - - - - - - - - - - - Groups projects by higher level taxonomy. Used in the creation of the - OldOrganismTree tuning table. - - - - - - - - - - Group species by higher level taxonomy. Each row associates a taxon of - interest with one of its ancestors in the taxon tree. Used in parameter - queries that have to know about the taxon tree, as well as - apidb.project_id(), the function that maps an organism name to a - project. Propagated to portal instances. - - - - - - - - - - - - - - - - - - - Group species by higher level taxonomy. Each row associates a taxon of - interest with one of its ancestors in the taxon tree. Used in parameter - queries that have to know about the taxon tree. Propagated to portal - instances. - - - - - - - - - - - - - - - - - - - - - - - - - - - - Each record maps an organism to its BLAST abbreviation. Used by - BLAST-query parameters. Propagated to portal instances. - - - - - - - - - - - - - For each project, show which BLAST databases are available for which - species. Used in BLAST param queries. Propagated to portal instances. - - - - - - - - - - - - - - - - Properties table for ChEBI Compounds. - - - - - - - - - - - - The BFMV for the compound WDK record type. Used by the model for the - compound record and queries, as well as in the creation of the - PathwayCompounds tuning table. Propagated to portal instances. - Note: children of ChEBI compounds are excluded, but data of these is gathered in the (parent) entries. - - - - - - - - - - - - - Alias table for Compounds. - Compound column is the source_id from CompoundAttributes (chEBI_ID). - ID can have this same ID, or mapping KEGG ID, or Name, or Synonym. - - - - - - - - - - - - - - - - Each record represents a 5-tuple of (reaction, compound, pathway, - enzyme, type). Used extensively in the model for pathway-related - queries, as well as by getImageMap.pl. - - - - - - - - - - - - - - - - - - - Aggregates reactions irrespective of pathway. Required to determine if BioCyc reactions are reversible. Used extensively in the model in conjunction with pathwaycompounds for pathway related queries - - - - - - - - - - ' || o.enzyme || '' ELSE o.enzyme END as expasy_html - FROM ( - SELECT i.* - , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-' - THEN - 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1=' - || ec.ec_number_1 - || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END - || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END - || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END - ELSE reaction_url END as expasy_url - , ec.description as enzyme_description - FROM ( - SELECT - reaction_id - , reaction_source_id - , reaction_url - , ext_db_name - , ext_db_version - , enzyme - , substrates_html || ' ' || sign || ' ' || products_html as equation_html - , substrates_text || ' ' || sign || ' ' || products_text as equation_text - , case when sign = '<=>' then 1 else 0 end as is_reversible - , substrates_text - , products_text - FROM ( - SELECT - reaction_id - , reaction_source_id - , reaction_url - , ext_db_name - , ext_db_version - , enzyme - , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign - , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html - , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text - , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html - , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text - FROM ( - WITH rep AS ( - SELECT DISTINCT - pr.PATHWAY_REACTION_ID as reaction_id - , pr.SOURCE_ID as reaction_source_id - , pn.DISPLAY_LABEL as enzyme - , coalesce(ca.compound_name, pc.compound_source_id) as compound - , prel.is_reversible as is_reversible_og - , last_value(prel.is_reversible) OVER (partition by pr.pathway_reaction_id ORDER BY prel.is_reversible ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) as is_reversible - , first_value(pc.type) over (partition by pr.pathway_reaction_id, pr.SOURCE_ID, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE, coalesce(pc.chebi_accession, pc.compound_source_id), coalesce(ca.compound_name, pc.compound_source_id) ORDER BY pc.pathway_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as type - FROM - sres.pathway p - , apidb.pathwayreaction pr - , APIDB.PATHWAYREACTIONREL prr - , SRES.PATHWAYNODE pn - , SRES.PATHWAYRELATIONSHIP prel - , SRES.ONTOLOGYTERM ot - , PathwayCompounds pc - LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id - WHERE p.PATHWAY_ID = prr.PATHWAY_ID - AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID - AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID - AND prel.NODE_ID = pn.PATHWAY_NODE_ID - AND ot.name = 'enzyme' - AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID - AND pc.PATHWAY_ID = p.PATHWAY_ID - AND pc.REACTION_id = pr.PATHWAY_REACTION_ID - ) - SELECT DISTINCT - pr.PATHWAY_REACTION_ID as reaction_id - , pr.SOURCE_ID as reaction_source_id - , ed.NAME as ext_db_name - , edr.VERSION as ext_db_version - , cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme - , min(rep.is_reversible) as is_reversible - , min(rep.type) as type - , string_agg (pc.type, ',' order by p.pathway_id) as type_list - , coalesce(ca.compound_name, pc.compound_source_id) as compound - , CASE - WHEN coalesce(pc.CHEBI_ACCESSION, pc.compound_source_id) LIKE 'CHEBI%' - THEN '' || coalesce(ca.compound_name, pc.compound_source_id) || '' - ELSE coalesce(pc.chebi_accession, pc.compound_source_id) - END as compound_url - , CASE (replace (replace (ed.name, 'Pathways_', ''), '_RSRC', '')) - WHEN 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || pr.source_id - WHEN 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id - WHEN 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id - WHEN 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || pr.source_id - WHEN 'FungiCyc' THEN NULL - END as reaction_url - FROM - sres.pathway p - , apidb.pathwayreaction pr - , APIDB.PATHWAYREACTIONREL prr - , SRES.PATHWAYNODE pn - , SRES.PATHWAYRELATIONSHIP prel - , SRES.EXTERNALDATABASE ed - , SRES.EXTERNALDATABASERELEASE edr - , SRES.ONTOLOGYTERM ot - , rep - , PathwayCompounds pc - LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id - WHERE p.PATHWAY_ID = prr.PATHWAY_ID - AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID - AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID - AND prel.NODE_ID = pn.PATHWAY_NODE_ID - AND ot.name = 'enzyme' - AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID - AND pc.EXT_DB_NAME = ed.NAME - AND pc.EXT_DB_VERSION = edr.VERSION - AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID - AND pc.PATHWAY_ID = p.PATHWAY_ID - AND pc.REACTION_id = pr.PATHWAY_REACTION_ID - AND rep.reaction_id = pr.pathway_reaction_id - AND rep.reaction_source_id = pr.source_id - AND rep.compound = coalesce(ca.compound_name, pc.compound_source_id) - AND rep.enzyme = pn.DISPLAY_LABEL - AND rep.is_reversible_og = prel.is_reversible - GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE - , coalesce(pc.chebi_accession, pc.compound_source_id) - , coalesce(ca.compound_name, pc.compound_source_id) - ) t1 - GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible - ) t2 - ) i - LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number - ) o - ]]> - - - - - - - - - Nodes and edges for pathway maps - - - - - - - - - - - - - - - - - - - - - - - 1 - ) - SELECT aee.e_id, pn.* - FROM pn - , AllEnzymeEdges aee - WHERE aee.all_edges = pn.all_edges - ]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - Used by pathway table on gene pages - - - - - - - - - - - - - - - for use in GenesByReactionCompounds question - - - - - - - - - - Each row stores mass-spec. based expression evidence for one sample of - one experiment for one gene. Used for mass spec queries in the model, - GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary - tuning table. - - - - - - - - - - - - - - - - - - - - - - - - - - - - Stores summary information from annotated genomes to facilitate overview section of gene page - - - - - - - - - - - - - - Mass-spec experiment results for a transcript. Used in the model for queries - related to transcripts. - - - - - - - - - - - - - Mass-spec experiment results for a peptide. Used by the model, GBrowse, - and PBrowse. - - - - - - - - - - - - - - - - - - - - - - - - - Data from the Seattle Structural Genomics Center for Infectious Disease, - populated from their web service. Used in the gene record. - - - - - - - - Used by the model and GBrowse, as well as an input in the creation of - the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary. - - - - - - - - Used by the model when writing profile data - - - - - - - - Associates an organism with the GBrowse and PBrowse tracks available - for it. Used by the gene record. - - - - - - - - - Associates an organism with the GBrowse and PBrowse tracks available - for it. Used by the gene record. - - - - - - - - - Each row maps a dataset onto an ID for which the dataset contains data; - each dataset gets one such row. - Used in dataset record queries. - - - - - - - - - - - - - - - - - - - Citation info for proteomics datasets, used by GBrowse - - - - ' || sample || '

' as sample_i - FROM MSPeptideSummary mps, DatasetPresenter ds - -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem - WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern) - ) t - group by name, id - ) - SELECT name, - substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable') - || ' PMID: ' || publications || '

Samples:

' - || sample_table || chr(10) || - ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation - FROM ( - SELECT ds.name as name, ds.summary as description, pubs.contact_email as email, - pubs.pmids as publications, samples.sample_table as sample_table - FROM DatasetPresenter ds, pubs, samples - WHERE ds.dataset_presenter_id = pubs.id - AND ds.dataset_presenter_id = samples.id - ) t - ]]> -
-
- - - - - - - - - - = 1.5 - ) t - GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name, - start_min, end_max, start_max, end_min, organism - ]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - = commit_after THEN - COMMIT; - ctrows := 0; - END IF; - END LOOP; - commit; - END; - $$ LANGUAGE PLPGSQL; - ]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id) - has data for a profile_set. - - - - - - - - - - - - - - - for gene-page expression graphs - - - - - - - - - - - - - - - - - - - - - For each distinct organism in GeneAttributes, all ancestors in the taxon tree. For the gene page. - - - - - - - - - - - - - - - - Materialization of GeneTables.GeneModelDump. - - - - - - - - + LOOP + ctrows := ctrows + 1; + INSERT INTO Profile&1 + (DATASET_NAME, DATASET_TYPE, DATASET_SUBTYPE, PROFILE_TYPE, NODE_TYPE, SOURCE_ID, PROFILE_STUDY_ID, PROFILE_SET_NAME, + PROFILE_SET_SUFFIX, PROFILE_AS_STRING, MAX_VALUE, MIN_VALUE, MAX_TIMEPOINT, MIN_TIMEPOINT) + VALUES + (pf_rows.DATASET_NAME, pf_rows.DATASET_TYPE, pf_rows.DATASET_SUBTYPE, pf_rows.PROFILE_TYPE, pf_rows.NODE_TYPE, pf_rows.SOURCE_ID, pf_rows.PROFILE_STUDY_ID, pf_rows.PROFILE_SET_NAME, + pf_rows.PROFILE_SET_SUFFIX, pf_rows.PROFILE_AS_STRING, pf_rows.MAX_VALUE, pf_rows.MIN_VALUE, pf_rows.MAX_TIMEPOINT, pf_rows.MIN_TIMEPOINT); + IF ctrows >= commit_after THEN + COMMIT; + ctrows := 0; + END IF; + END LOOP; + commit; + END; + $$ LANGUAGE PLPGSQL; + ]]> - - - - - - - Distinct filter_name for gene query summary. For the initial version, - at least, it's (GeneAttriutes.species UNION GeneAttributes.organism). - Note that the UNION implies set bahavior and therefore distinctness. - - + UPDATE Profile&1 + SET dataset_name = 'tbruTREU927_Rijo_Circadian_Regulation_rnaSeq_RSRC' + WHERE dataset_name= 'tbruTREU927_RNASeq_Rijo_Circadian_Regulation_RSRC' + ]]> - - - Info from dots.ExternalAaSequence records for BLAT alignments - - - - - + + - - - Materialization of the orthology transform. Also useful for GeneTables.Orthologs. - - - - - - - - = ga.start_min - and sg.start_min <= ga.end_max - ]]> - - - - - - - - - - + + + + + + + - - - - - For each RNA-Seq Dataset, compute the top 500 ratios of max/min gene expression. - - - - - - - - - - - - for GeneTables.RodMalPhenotypeTable + + For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id) + has data for a profile_set. - - - - + + results.source_id - OR knockdown.source_id is null - ORDER BY results.source_id, results.rmgmid - ]]> + create table GeneGroupProfile&1 as + select distinct other_gene.source_id, p.dataset_name, + this_gene.source_id as profile_graph_id + from OrthologousTranscripts ot + , Profile p + , GeneAttributes this_gene + , GeneAttributes other_gene + where p.source_id = ot.source_id + and ot.source_id = this_gene.source_id + and ot.ortho_gene_source_id = other_gene.source_id + and this_gene.species = other_gene.species + and ot.is_syntenic = 1 + union + select ga.source_id, p.dataset_name, p.source_id as profile_graph_id + from Profile p, GeneAttributes ga + where p.source_id = ga.source_id + ]]> - - - Chromosome data for CNV queries - - - - - - - - - - - - - - - - - Gene Data for CNV queries - - - + + + for gene-page expression graphs + + + + + + + + + CREATE TABLE ExpressionGraphsData&1 as + -- RNASeq + SELECT distinct ga.source_id, dnt.dataset_presenter_id as dataset_id, + cast(case when ps.profile_set_suffix is null then ps.protocol_app_node_name + when ps.protocol_app_node_name ='value' then ps.profile_set_suffix + else ps.protocol_app_node_name || ' - ' || ps.profile_set_suffix end || + case when substr(dp.value, 1, 10) in ('0', 'false') and pan.name like '% firststrand %' then '- sense' + when substr(dp.value, 1, 10) in ('0', 'false') and pan.name like '% secondstrand %' then '- antisense' + when substr(dp.value, 1, 10) in ('1', 'true') and pan.name like '% firststrand %' then '- antisense' + when substr(dp.value, 1, 10) in ('1', 'true') and pan.name like '% secondstrand %' then '- sense' + else '' end || + case when pan.name like '% unique%' then ' - unique' + else '' end + as varchar(300)) AS sample_name, + round(nfe.value::numeric, 2) as value, + round(nfe.standard_error::numeric, 2) as standard_error, + round(nfe.percentile_channel1::numeric, 2) as percentile_channel1, + round(nfe.percentile_channel2::numeric, 2) as percentile_channel2, + ps.node_order_num, + ps.protocol_app_node_id + FROM results.NaFeatureExpression nfe, GeneAttributes ga, study.ProtocolAppNode pan, + ProfileSamples ps, study.nodeSet s, + sres.ExternalDatabaseRelease r, sres.ExternalDatabase d, + DatasetPresenter dnt, DatasetProperty dp + WHERE ga.na_feature_id = nfe.na_feature_id + AND nfe.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id = ps.protocol_app_node_id + AND ps.study_id = s.node_set_id + AND s.external_database_release_id = r.external_database_release_id + AND r.external_database_id = d.external_database_id + AND ps.dataset_name = dnt.name + AND dnt.dataset_presenter_id = dp.dataset_presenter_id + AND dp.property = 'switchStrandsProfiles' + AND ps.study_name not like '%cuff%' + AND ps.study_name not like '%htseq-intersection-nonempty%' + AND ps.study_name not like '%htseq-intersection-strict%' + UNION + -- Splice Site data + SELECT ga.source_id, dnt.dataset_presenter_id as dataset_id, + cast (case when ps.profile_set_suffix is null + then ps.protocol_app_node_name + else ps.protocol_app_node_name || ' ' || ps.profile_set_suffix + end as varchar(300)) AS sample_name, + round(nfe.value::numeric, 2) as value, + round(nfe.standard_error::numeric, 2) as standard_error, + round(nfe.percentile_channel1::numeric, 2) as percentile_channel1, + round(nfe.percentile_channel2::numeric, 2) as percentile_channel2, + ps.node_order_num, + ps.protocol_app_node_id + FROM results.NaFeatureExpression nfe, GeneAttributes ga, study.ProtocolAppNode pan, + ProfileSamples ps, study.nodeSet s, + sres.ExternalDatabaseRelease r, sres.ExternalDatabase d, + DatasetNameTaxon dnt,DatasetProperty dp + WHERE ga.na_feature_id = nfe.na_feature_id + AND nfe.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id = ps.protocol_app_node_id + AND ps.study_id = s.node_set_id + AND s.external_database_release_id = r.external_database_release_id + AND r.external_database_id = d.external_database_id + AND d.name = dnt.name + AND dnt.dataset_presenter_id = dp.dataset_presenter_id + AND dp.property = 'type' and substr(dp.value, 1, 10) = 'Splice Site' + UNION + -- microarray expression, quantitative proteomics, and eQTL + SELECT ga.source_id, dnt.dataset_presenter_id as dataset_id, + cast (case when ps.profile_set_suffix is null + then ps.protocol_app_node_name + else ps.protocol_app_node_name || ' ' || ps.profile_set_suffix + end as varchar(300)) AS sample_name, + CASE WHEN (d.NAME ='pfal3D7_quantitativeMassSpec_Apicoplast_ER_RSRC') + THEN round(nfe.VALUE::numeric, 6) + ELSE round(nfe.value::numeric, 2) END as value, + round(nfe.standard_error::numeric, 2) as standard_error, + round(nfe.percentile_channel1::numeric, 2) as percentile_channel1, + round(nfe.percentile_channel2::numeric, 2) as percentile_channel2, + ps.node_order_num, + ps.protocol_app_node_id + FROM results.NaFeatureExpression nfe, GeneAttributes ga, study.ProtocolAppNode pan, + ProfileSamples ps, study.nodeSet s, + sres.ExternalDatabaseRelease r, sres.ExternalDatabase d, + DatasetNameTaxon dnt, DatasetPresenter dsp + WHERE ga.na_feature_id = nfe.na_feature_id + AND nfe.protocol_app_node_id = pan.protocol_app_node_id + AND pan.protocol_app_node_id = ps.protocol_app_node_id + AND ps.study_id = s.node_set_id + AND s.external_database_release_id = r.external_database_release_id + AND r.external_database_id = d.external_database_id + AND d.name = dnt.name + AND dnt.dataset_presenter_id = dsp.dataset_presenter_id + AND dsp.subtype != 'rnaseq' + ]]> + ]]> @@ -8690,90 +2503,6 @@ WHERE blat.query_na_sequence_id = etn.na_sequence_id - - for TranscriptAttributes.InterproColumns - - - - - - - - - - - - - - - - annotation updates from Apollo @@ -8783,37 +2512,6 @@ WHERE blat.query_na_sequence_id = etn.na_sequence_id - - semicolon-delimited list of formatted genomic locations for each gene - - - - - - - - - @@ -9017,32 +2715,6 @@ sub readClob { --> - - - One phylogenetic-profile string per ortholog group - - - - - - - - - - - @@ -9204,252 +2876,6 @@ sub readClob { - - - Links AlphaFold entries to gene ids where Uniprot ids are directly assigned - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Stores intron support for introns - - - - - - - - = CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END - AND (gij.contained = 0 or gij.percent_max >= 2 /*stats.perc0005_annot_percent_max*/) - ) t - GROUP BY gene_source_id, ontology_term, intron_count - ) t - WHERE string_value = 'All' - - UNION - - SELECT gene_source_id - , ontology_term - , case when count(*) = intron_count THEN 'All-high' - when count(*) = 0 THEN 'None' - else 'Any-high' end as string_value - FROM ( - SELECT gij.gene_source_id - , 'intron_junction' as ontology_term - , intronCount.intron_count - FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats - , (SELECT count (*) as intron_count, source_id FROM apidbtuning.genemodeldump WHERE type = 'Intron' GROUP BY source_id) intronCount - WHERE gij.gene_source_id = intronCount.source_id - AND gij.na_sequence_id = stats.na_sequence_id - AND gij.annotated_intron = 'Yes' - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END - AND (gij.contained = 0 or gij.percent_max >= 2) - ) t2 - GROUP BY gene_source_id, ontology_term, intron_count - - UNION - - SELECT gene_source_id - , ontology_term - , CASE WHEN count(*) = intron_count THEN 'All-low' - WHEN count(*) = 0 THEN 'None' - ELSE 'Any-low' END as string_value - FROM ( - SELECT gij.gene_source_id - , 'intron_junction' as ontology_term - , intronCount.intron_count - FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats - , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount - WHERE gij.gene_source_id = intronCount.source_id - AND gij.na_sequence_id = stats.na_sequence_id - AND gij.annotated_intron = 'Yes' - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END - AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max) - AND gij.intron_feature_id not in ( - SELECT gij.intron_feature_id - FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats - WHERE gij.na_sequence_id = stats.na_sequence_id - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END - AND (gij.contained = 0 or gij.percent_max >= 2) - ) - ) t3 - GROUP BY gene_source_id, ontology_term, intron_count - - UNION - - SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-low') as string_value - FROM ( - SELECT gene_source_id - , ontology_term - , case when count(*) = intron_count THEN 'All' - when count(*) = 0 THEN 'None' - else 'Any' end as string_value - FROM ( - SELECT gij.gene_source_id - , 'intron_junction' as ontology_term - , intronCount.intron_count - FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats - , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount - WHERE gij.gene_source_id = intronCount.source_id - AND gij.na_sequence_id = stats.na_sequence_id - AND gij.annotated_intron = 'Yes' - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END - AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max) - AND gij.intron_feature_id not in ( - SELECT gij.intron_feature_id - FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats - WHERE gij.na_sequence_id = stats.na_sequence_id - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END - AND (gij.contained = 0 or gij.percent_max >= 2) - ) - ) t - GROUP BY gene_source_id, ontology_term, intron_count - ) t4 - WHERE string_value = 'All' - ) t -]]> - - - all products for each gene diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt index d7d5687d76..1806c9dd44 100644 --- a/Model/lib/xml/tuningManager/tablePruning.txt +++ b/Model/lib/xml/tuningManager/tablePruning.txt @@ -56,7 +56,7 @@ R R R MO (replace study table with nodeset) -MO (replace study table with nodeset) +MO FIX (replace study table with nodeset) MO R (never used but possibly should add back?) R @@ -127,7 +127,7 @@ R (transcript attributes query needs to use n K MO K -MO (rm auto_lob; don't need to loop over chunks in postgres) +MO FIX (rm auto_lob; don't need to loop over chunks in postgres) MC K K From 4e14421c572ef0b5e11900f42d5725062c983f24 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 20 May 2025 14:47:40 -0400 Subject: [PATCH 028/112] mv pathwaynodes --- Model/lib/psql/webready/{global => orgSpecific}/PathwayNodes.psql | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Model/lib/psql/webready/{global => orgSpecific}/PathwayNodes.psql (100%) diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodes.psql similarity index 100% rename from Model/lib/psql/webready/global/PathwayNodes.psql rename to Model/lib/psql/webready/orgSpecific/PathwayNodes.psql From 82ce2967cbf8b49029c7c1102466676f27465342 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 20 May 2025 14:51:39 -0400 Subject: [PATCH 029/112] mv pathwaynodes --- Model/lib/psql/webready/{orgSpecific => global}/PathwayNodes.psql | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Model/lib/psql/webready/{orgSpecific => global}/PathwayNodes.psql (100%) diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql similarity index 100% rename from Model/lib/psql/webready/orgSpecific/PathwayNodes.psql rename to Model/lib/psql/webready/global/PathwayNodes.psql From cfcb7e626b542f355c149d3b92ccf663b58f4ade Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 20 May 2025 16:49:25 -0400 Subject: [PATCH 030/112] adjust PathwayNodes --- Model/lib/psql/webready/global/PathwayNodes.psql | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql index 87a255e6ae..781582fdf1 100644 --- a/Model/lib/psql/webready/global/PathwayNodes.psql +++ b/Model/lib/psql/webready/global/PathwayNodes.psql @@ -13,7 +13,6 @@ , pn.height , pn.cellular_location , ot.name AS type - , NULL AS gene_count , NULL AS default_structure FROM sres.pathwaynode pn INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id @@ -32,12 +31,10 @@ , pn.height , pn.cellular_location , ot.name AS type - , count (tp.gene_source_id) as gene_count , NULL AS default_structure FROM sres.pathwaynode pn INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id LEFT JOIN sres.enzymeclass ec ON pn.row_id = ec.enzyme_class_id - LEFT JOIN :SCHEMA.PathwayNodeGene tp ON pn.pathway_node_id = tp.pathway_node_id WHERE ot.name = 'enzyme' GROUP BY pn.pathway_id , pn.display_label @@ -63,7 +60,6 @@ , pn.height , pn.cellular_location , ot.name AS type - , NULL AS gene_count , st.default_structure FROM sres.pathwaynode pn INNER JOIN sres.ontologyterm ot ON pn.pathway_node_type_id = ot.ontology_term_id @@ -262,7 +258,6 @@ , nodes_with_parents.parent , reaction_source_id , coalesce(type, nodes_with_parents.node_type) AS node_type - , pn.gene_count , pn.default_structure FROM ( SELECT e_id::varchar AS pathway_node_id From 5f6cd2cbf088895047d6e1b180a3f8751ee20b32 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 20 May 2025 17:38:50 -0400 Subject: [PATCH 031/112] fix pathwaynodes --- Model/lib/psql/webready/global/PathwayNodes.psql | 2 +- Model/lib/xml/tuningManager/tablePruning.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql index 781582fdf1..eef89059da 100644 --- a/Model/lib/psql/webready/global/PathwayNodes.psql +++ b/Model/lib/psql/webready/global/PathwayNodes.psql @@ -70,7 +70,7 @@ FROM chebi.structures s , (SELECT id , compound - FROM CompoundId + FROM :SCHEMA.CompoundId WHERE type IN ('same ID', 'child ID') ) n WHERE n.id = 'CHEBI:' || s.compound_id diff --git a/Model/lib/xml/tuningManager/tablePruning.txt b/Model/lib/xml/tuningManager/tablePruning.txt index 1806c9dd44..11012b11d9 100644 --- a/Model/lib/xml/tuningManager/tablePruning.txt +++ b/Model/lib/xml/tuningManager/tablePruning.txt @@ -1,4 +1,4 @@ -MG +K FIXFIX MO ?? ?? From 8cbe278745f17e8250284db3e940fad7cb85e532 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 20 May 2025 21:33:56 -0400 Subject: [PATCH 032/112] clean comparative and global --- .../webready/comparative/AlphaFoldGenes.psql | 40 ++++++++---------- .../comparative/AlphaFoldGenes_ix.psql | 3 ++ .../comparative/GroupPhylogeneticProfile.psql | 10 +---- .../GroupPhylogeneticProfile_ix.psql | 3 ++ .../comparative/OrthologousTranscripts.psql | 41 ++++--------------- .../OrthologousTranscripts_ix.psql | 12 ++++++ .../webready/global/CompoundAttributes.psql | 4 +- .../lib/psql/webready/global/CompoundId.psql | 2 +- .../webready/global/PathwayReactions.psql | 2 +- .../webready/global/SequenceAttributes.psql | 33 +-------------- .../global/SequenceAttributes_ix.psql | 31 ++++++++++++++ 11 files changed, 81 insertions(+), 100 deletions(-) create mode 100644 Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql create mode 100644 Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql create mode 100644 Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql create mode 100644 Model/lib/psql/webready/global/SequenceAttributes_ix.psql diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql index 164c150fcb..2e9c2c7e34 100644 --- a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql +++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql @@ -1,6 +1,6 @@ - - - CREATE TABLE uniprotGenes AS + drop table if exists :SCHEMA.uniprotgenes; + + CREATE TABLE :SCHEMA.uniprotGenes AS SELECT DISTINCT ed.name , d.* , edr.version @@ -18,7 +18,7 @@ , sres.externaldatabaserelease edr , dots.dbrefaafeature db , dots.aafeature aa - , ProteinAttributes pa + , :SCHEMA.ProteinAttributes pa WHERE (ed.name = 'Uniprot/SWISSPROT' OR ed.name = 'Uniprot/SPTREMBL') AND (edr.version = 'xrefuniparc' OR edr.version = 'xref_sprot_blastp' OR edr.version = 'xref_trembl_blastp') AND edr.external_database_id = ed.external_database_id @@ -40,7 +40,7 @@ , sres.externaldatabaserelease edr , dots.dbrefnafeature db , dots.nafeature na - , TranscriptAttributes ta + , :SCHEMA.TranscriptAttributes ta WHERE ed.name like '%_dbxref_%niprot_%RSRC' AND edr.external_database_id = ed.external_database_id AND d.external_database_release_id = edr.external_database_release_id @@ -50,37 +50,34 @@ ; - + drop table if exists :SCHEMA.minrank - CREATE UNLOGGED TABLE minRank AS ( + CREATE UNLOGGED TABLE :SCHEMA.minRank AS ( SELECT gene_source_id , MIN(rank) as min_rank - FROM uniprotGenes upg + FROM :SCHEMA.uniprotGenes upg WHERE hit_length is not null GROUP BY gene_source_id ) ; - + drop table if exists :SCHEMA.alphafoldhits; - CREATE UNLOGGED TABLE alphaFoldHits AS ( + CREATE UNLOGGED TABLE :SCHEMA.alphaFoldHits AS ( SELECT DISTINCT gene_source_id , last_value(primary_identifier) over (PARTITION BY gene_source_id ORDER BY hit_length ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS uniprot_id FROM ( SELECT upg.* - FROM uniprotGenes upg - , minRank + FROM :SCHEMA.uniprotGenes upg + , :SCHEMA.minRank WHERE upg.gene_source_id = minRank.gene_source_id AND upg.rank = minRank.min_rank ) t ) - ; - - - CREATE TABLE AlphaFoldGenes AS ( + CREATE TABLE :SCHEMA.AlphaFoldGenes AS ( SELECT afh.gene_source_id , af.uniprot_id , af.source_id as alphafold_id @@ -88,16 +85,13 @@ , af.first_residue_index , af.last_residue_index FROM apidb.alphafold af - , alphaFoldHits afh + , :SCHEMA.alphaFoldHits afh WHERE afh.uniprot_id = af.uniprot_id ) ; - - - CREATE index AlphaFoldGenes_idx ON AlphaFoldGenes (gene_source_id, uniprot_id) - - - ; + drop table if exists :SCHEMA.uniprotgenes; + drop table if exists :SCHEMA.minrank + drop table if exists :SCHEMA.alphafoldhits; diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql new file mode 100644 index 0000000000..df161ee060 --- /dev/null +++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql @@ -0,0 +1,3 @@ + CREATE index AlphaFoldGenes_idx ON AlphaFoldGenes (gene_source_id, uniprot_id) + + ; diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql index a7d484850c..ddb118b5e4 100644 --- a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql +++ b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql @@ -1,10 +1,9 @@ - - CREATE table GroupPhylogeneticProfile as + CREATE table :SCHEMA.GroupPhylogeneticProfile as SELECT rep.orthomcl_name, pp.profile_string FROM apidb.PhylogeneticProfile pp, (SELECT orthomcl_name, max(source_id) as source_id - FROM GeneAttributes + FROM :SCHEM.GeneAttributes GROUP BY orthomcl_name) rep WHERE rep.source_id = pp.source_id @@ -12,9 +11,4 @@ - create index group_pp_ix - on GroupPhylogeneticProfile (orthomcl_name) - - - ; diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql new file mode 100644 index 0000000000..1753b0f2db --- /dev/null +++ b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql @@ -0,0 +1,3 @@ + create index group_pp_ix + on :SCHEMA.GroupPhylogeneticProfile (orthomcl_name) + ; diff --git a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql index d6e7191d1e..e4766f036f 100644 --- a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql +++ b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql @@ -1,25 +1,19 @@ + drop table if exists :SCHEMA.SyntenicPairs; - - create UNLOGGED table SyntenicPairs as + create UNLOGGED table :SCHEMA.SyntenicPairs as select distinct ga.na_feature_id, sg.syn_na_feature_id - from apidb.SyntenicGene sg, GeneAttributes ga + from apidb.SyntenicGene sg, :SCHEMA.GeneAttributes ga where sg.na_sequence_id = ga.na_sequence_id and sg.end_max >= ga.start_min and sg.start_min <= ga.end_max ; - - create index SynPair_idx - on SyntenicPairs (na_feature_id, syn_na_feature_id) - - + on :SCHEMA.SyntenicPairs (na_feature_id, syn_na_feature_id) ; - - - create table OrthologousTranscripts as + create table :SCHEMA.OrthologousTranscripts as with all_pairs as (select ga.source_id , ga.project_id @@ -34,8 +28,8 @@ , ota.organism as ortho_organism , ota.taxon_id as ortho_taxon_id , o.is_reference_strain - from Geneattributes ga - , TranscriptAttributes ota + from :SCHEMA.Geneattributes ga + , :SCHEMA.TranscriptAttributes ota , apidb.Organism o where ga.ORTHOMCL_NAME = ota.ORTHOMCL_NAME and ota.taxon_id = o.taxon_id @@ -49,25 +43,6 @@ left join syn_pairs on all_pairs.na_feature_id = syn_pairs.na_feature_id and all_pairs.ortho_na_feature_id = syn_pairs.syn_na_feature_id - - ; - - - - create index ot_idx - on OrthologousTranscripts (source_id, project_id, is_syntenic desc, ortho_source_id, - ortho_project_id, ortho_gene_source_id, ortho_product, - ortho_name, ortho_organism, ortho_taxon_id, is_reference_strain) - - - ; - - - - create index ot_smol_idx - on OrthologousTranscripts (is_syntenic, ortho_taxon_id, source_id, ortho_source_id, - ortho_project_id, ortho_gene_source_id) - - ; + drop table if exists :SCHEMA.SyntenicPairs; diff --git a/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql new file mode 100644 index 0000000000..677049f727 --- /dev/null +++ b/Model/lib/psql/webready/comparative/OrthologousTranscripts_ix.psql @@ -0,0 +1,12 @@ + + create index ot_idx + on :SCHEMA.OrthologousTranscripts (source_id, project_id, is_syntenic desc, ortho_source_id, + ortho_project_id, ortho_gene_source_id, ortho_product, + ortho_name, ortho_organism, ortho_taxon_id, is_reference_strain) + ; + + create index ot_smol_idx + on :SCHEMA.OrthologousTranscripts (is_syntenic, ortho_taxon_id, source_id, ortho_source_id, + ortho_project_id, ortho_gene_source_id) + ; + diff --git a/Model/lib/psql/webready/global/CompoundAttributes.psql b/Model/lib/psql/webready/global/CompoundAttributes.psql index 22f54648e9..aa622e1f39 100644 --- a/Model/lib/psql/webready/global/CompoundAttributes.psql +++ b/Model/lib/psql/webready/global/CompoundAttributes.psql @@ -9,8 +9,8 @@ , p.secondary_ids , string_agg(childc.formula, ';' ORDER BY childc.formula) AS formula , avg(childc.mass::numeric) AS mass - FROM CompoundProperties p - , (SELECT id, parent_id, other_names, iupac_name, syn, mass, formula FROM CompoundProperties ) childc + FROM :SCHEMA.CompoundProperties p + , (SELECT id, parent_id, other_names, iupac_name, syn, mass, formula FROM :SCHEMA.CompoundProperties ) childc WHERE p.parent_id IS NULL AND ( p.ID = childc.parent_id OR p.ID = childc.ID ) GROUP BY p.ID, p.source_id, p.compound_name, p.definition, p.secondary_ids diff --git a/Model/lib/psql/webready/global/CompoundId.psql b/Model/lib/psql/webready/global/CompoundId.psql index ca4a9c9c60..9cd80c05a2 100644 --- a/Model/lib/psql/webready/global/CompoundId.psql +++ b/Model/lib/psql/webready/global/CompoundId.psql @@ -3,7 +3,7 @@ FROM :SCHEMA.CompoundAttributes UNION SELECT p.source_id AS id, ca.source_id AS compound, 'child ID' AS type, '' as source - FROM :SCHEMA.CompoundAttributes ca, CompoundProperties p + FROM :SCHEMA.CompoundAttributes ca, :SCHEMA.CompoundProperties p WHERE ca.id = p.parent_id UNION SELECT da.accession_number AS id, p.source_id AS compound, 'KEGG' AS type, '' as source diff --git a/Model/lib/psql/webready/global/PathwayReactions.psql b/Model/lib/psql/webready/global/PathwayReactions.psql index 5787e17267..46e140b408 100644 --- a/Model/lib/psql/webready/global/PathwayReactions.psql +++ b/Model/lib/psql/webready/global/PathwayReactions.psql @@ -100,7 +100,7 @@ , SRES.ONTOLOGYTERM ot , rep , :SCHEMA.PathwayCompounds pc - LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id + LEFT JOIN :SCHEMA.CompoundAttributes ca ON pc.chebi_accession = ca.source_id WHERE p.PATHWAY_ID = prr.PATHWAY_ID AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID diff --git a/Model/lib/psql/webready/global/SequenceAttributes.psql b/Model/lib/psql/webready/global/SequenceAttributes.psql index 4cba9b09fb..77e8aedba8 100644 --- a/Model/lib/psql/webready/global/SequenceAttributes.psql +++ b/Model/lib/psql/webready/global/SequenceAttributes.psql @@ -93,39 +93,8 @@ - alter table SequenceAttributes + alter table :SCHEMA.SequenceAttributes add constraint SeqAttrs_pk primary key (full_id) ; - - - create unique index SeqAttrs_idx ON SequenceAttributes (full_id, group_name, taxon_id, source_id) - - ; - - - - create unique index SeqAttrs_gusIdx ON SequenceAttributes (ortholog_group_id, aa_sequence_id) - - ; - - - - create unique index SeqAttrs_idx2 ON SequenceAttributes (group_name, length desc, full_id, taxon_id) - - ; - - - - create unique index SeqAttrs_idx3 - on SequenceAttributes (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id) - - ; - - - - create unique index SeqAttrs_idx4 ON SequenceAttributes (source_id, full_id, group_name, taxon_id) - - ; - diff --git a/Model/lib/psql/webready/global/SequenceAttributes_ix.psql b/Model/lib/psql/webready/global/SequenceAttributes_ix.psql new file mode 100644 index 0000000000..88e66acdde --- /dev/null +++ b/Model/lib/psql/webready/global/SequenceAttributes_ix.psql @@ -0,0 +1,31 @@ + + + create unique index SeqAttrs_idx ON :SCHEMA.SequenceAttributes (full_id, group_name, taxon_id, source_id) + + ; + + + + create unique index SeqAttrs_gusIdx ON :SCHEMA.SequenceAttributes (ortholog_group_id, aa_sequence_id) + + ; + + + + create unique index SeqAttrs_idx2 ON :SCHEMA.SequenceAttributes (group_name, length desc, full_id, taxon_id) + + ; + + + + create unique index SeqAttrs_idx3 + on :SCHEMA.SequenceAttributes (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id) + + ; + + + + create unique index SeqAttrs_idx4 ON :SCHEMA.SequenceAttributes (source_id, full_id, group_name, taxon_id) + + ; + From 0141db3799eefcc7d389fd4d27845d2c207dd241 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Wed, 21 May 2025 14:17:03 -0400 Subject: [PATCH 033/112] fix index schema --- Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql | 2 +- Model/lib/psql/webready/global/CompoundAttributes_ix.psql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql index df161ee060..5a938f8616 100644 --- a/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql +++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes_ix.psql @@ -1,3 +1,3 @@ - CREATE index AlphaFoldGenes_idx ON AlphaFoldGenes (gene_source_id, uniprot_id) + CREATE index AlphaFoldGenes_idx ON :SCHEMA.AlphaFoldGenes (gene_source_id, uniprot_id) ; diff --git a/Model/lib/psql/webready/global/CompoundAttributes_ix.psql b/Model/lib/psql/webready/global/CompoundAttributes_ix.psql index 697149da51..2cdd87d86e 100644 --- a/Model/lib/psql/webready/global/CompoundAttributes_ix.psql +++ b/Model/lib/psql/webready/global/CompoundAttributes_ix.psql @@ -1,2 +1,2 @@ - CREATE INDEX CompoundAttributes_idx ON CompoundAttributes (source_id) + CREATE INDEX CompoundAttributes_idx ON :SCHEMA.CompoundAttributes (source_id) ; From 7bea7835a1a33228ab20908a66d3cf1e71a8013d Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 10:08:10 -0400 Subject: [PATCH 034/112] optimize compprops and pathreact --- .../global/CompoundProperties_ix.psql | 2 + .../webready/global/PathwayReactions.psql | 115 ++++++++++-------- 2 files changed, 65 insertions(+), 52 deletions(-) create mode 100644 Model/lib/psql/webready/global/CompoundProperties_ix.psql diff --git a/Model/lib/psql/webready/global/CompoundProperties_ix.psql b/Model/lib/psql/webready/global/CompoundProperties_ix.psql new file mode 100644 index 0000000000..46bd882ae0 --- /dev/null +++ b/Model/lib/psql/webready/global/CompoundProperties_ix.psql @@ -0,0 +1,2 @@ +CREATE INDEX CompoundProperties_idx1 ON :SCHEMA.CompoundProperties (id) + CREATE INDEX CompoundProperties_idx2 ON :SCHEMA.CompoundProperties (parent_id) \ No newline at end of file diff --git a/Model/lib/psql/webready/global/PathwayReactions.psql b/Model/lib/psql/webready/global/PathwayReactions.psql index 46e140b408..93e6e626c4 100644 --- a/Model/lib/psql/webready/global/PathwayReactions.psql +++ b/Model/lib/psql/webready/global/PathwayReactions.psql @@ -1,45 +1,6 @@ - CREATE TABLE :SCHEMA.PathwayReactions AS - SELECT o.* - , CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html - FROM ( - SELECT i.* - , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-' - THEN - 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1=' - || ec.ec_number_1 - || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END - || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END - || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END - ELSE reaction_url END as expasy_url - , ec.description as enzyme_description - FROM ( - SELECT - reaction_id - , reaction_source_id - , reaction_url - , ext_db_name - , ext_db_version - , enzyme - , substrates_html || ' ' || sign || ' ' || products_html as equation_html - , substrates_text || ' ' || sign || ' ' || products_text as equation_text - , case when sign = '<=>' then 1 else 0 end as is_reversible - , substrates_text - , products_text - FROM ( - SELECT - reaction_id - , reaction_source_id - , reaction_url - , ext_db_name - , ext_db_version - , enzyme - , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign - , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html - , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text - , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html - , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text - FROM ( - WITH rep AS ( + drop table if exists :SCHEMA.PR_rep; + + create unlogged table :SCHEMA.PR_rep as SELECT DISTINCT pr.PATHWAY_REACTION_ID as reaction_id , pr.SOURCE_ID as reaction_source_id @@ -65,15 +26,20 @@ AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID AND pc.PATHWAY_ID = p.PATHWAY_ID AND pc.REACTION_id = pr.PATHWAY_REACTION_ID - ) + ; + + + drop table if exists :SCHEMA.PR_t1; + + create unlogged table :SCHEMA.PR_t1 as SELECT DISTINCT pr.PATHWAY_REACTION_ID as reaction_id , pr.SOURCE_ID as reaction_source_id , ed.NAME as ext_db_name , edr.VERSION as ext_db_version , cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme - , min(rep.is_reversible) as is_reversible - , min(rep.type) as type + , min(PR_rep.is_reversible) as is_reversible + , min(PR_rep.type) as type , string_agg (pc.type, ',' order by p.pathway_id) as type_list , coalesce(ca.compound_name, pc.compound_source_id) as compound , CASE @@ -98,7 +64,7 @@ , SRES.EXTERNALDATABASE ed , SRES.EXTERNALDATABASERELEASE edr , SRES.ONTOLOGYTERM ot - , rep + , :SCHEMA.pr_rep pr_rep , :SCHEMA.PathwayCompounds pc LEFT JOIN :SCHEMA.CompoundAttributes ca ON pc.chebi_accession = ca.source_id WHERE p.PATHWAY_ID = prr.PATHWAY_ID @@ -112,15 +78,58 @@ AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID AND pc.PATHWAY_ID = p.PATHWAY_ID AND pc.REACTION_id = pr.PATHWAY_REACTION_ID - AND rep.reaction_id = pr.pathway_reaction_id - AND rep.reaction_source_id = pr.source_id - AND rep.compound = coalesce(ca.compound_name, pc.compound_source_id) - AND rep.enzyme = pn.DISPLAY_LABEL - AND rep.is_reversible_og = prel.is_reversible + AND PR_rep.reaction_id = pr.pathway_reaction_id + AND PR_rep.reaction_source_id = pr.source_id + AND PR_rep.compound = coalesce(ca.compound_name, pc.compound_source_id) + AND PR_rep.enzyme = pn.DISPLAY_LABEL + AND PR_rep.is_reversible_og = prel.is_reversible GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE , coalesce(pc.chebi_accession, pc.compound_source_id) , coalesce(ca.compound_name, pc.compound_source_id) - ) t1 + ; + + + CREATE TABLE :SCHEMA.PathwayReactions AS + SELECT o.* + , CASE WHEN o.expasy_url IS NOT NULL THEN '' || o.enzyme || '' ELSE o.enzyme END as expasy_html + FROM ( + SELECT i.* + , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-' + THEN + 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1=' + || ec.ec_number_1 + || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END + || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END + || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END + ELSE reaction_url END as expasy_url + , ec.description as enzyme_description + FROM ( + SELECT + reaction_id + , reaction_source_id + , reaction_url + , ext_db_name + , ext_db_version + , enzyme + , substrates_html || ' ' || sign || ' ' || products_html as equation_html + , substrates_text || ' ' || sign || ' ' || products_text as equation_text + , case when sign = '<=>' then 1 else 0 end as is_reversible + , substrates_text + , products_text + FROM ( + SELECT + reaction_id + , reaction_source_id + , reaction_url + , ext_db_name + , ext_db_version + , enzyme + , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign + , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html + , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text + , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html + , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text + FROM :SCHEMA.PR_t1 as t1 GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible ) t2 ) i @@ -129,3 +138,5 @@ ; + drop table if exists :SCHEMA.PR_rep; + drop table if exists :SCHEMA.PR_t1; From 74d57189922f8ebc40d587786228536237708d6e Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 12:27:54 -0400 Subject: [PATCH 035/112] fix missing :SCHEMA --- Model/lib/psql/webready/global/PathwayNodes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql index eef89059da..20b181f77c 100644 --- a/Model/lib/psql/webready/global/PathwayNodes.psql +++ b/Model/lib/psql/webready/global/PathwayNodes.psql @@ -126,7 +126,7 @@ SELECT string_agg(io, ',' ORDER BY io) AS all_edges , e_id , pathway_id - FROM EnzymeEdges + FROM :SCHEMA.EnzymeEdges GROUP BY pathway_id , e_id ) From 30e58841fa3193f4d0501cda09f8d2d78143857c Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 12:44:49 -0400 Subject: [PATCH 036/112] fix typos --- Model/lib/psql/webready/global/CompoundProperties_ix.psql | 5 +++-- Model/lib/psql/webready/global/PathwayAttributes_ix.psql | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/global/CompoundProperties_ix.psql b/Model/lib/psql/webready/global/CompoundProperties_ix.psql index 46bd882ae0..5943ec2260 100644 --- a/Model/lib/psql/webready/global/CompoundProperties_ix.psql +++ b/Model/lib/psql/webready/global/CompoundProperties_ix.psql @@ -1,2 +1,3 @@ -CREATE INDEX CompoundProperties_idx1 ON :SCHEMA.CompoundProperties (id) - CREATE INDEX CompoundProperties_idx2 ON :SCHEMA.CompoundProperties (parent_id) \ No newline at end of file +CREATE INDEX CompoundProperties_idx1 ON :SCHEMA.CompoundProperties (id); + + CREATE INDEX CompoundProperties_idx2 ON :SCHEMA.CompoundProperties (parent_id); \ No newline at end of file diff --git a/Model/lib/psql/webready/global/PathwayAttributes_ix.psql b/Model/lib/psql/webready/global/PathwayAttributes_ix.psql index a5c4902674..492ee68829 100644 --- a/Model/lib/psql/webready/global/PathwayAttributes_ix.psql +++ b/Model/lib/psql/webready/global/PathwayAttributes_ix.psql @@ -1,4 +1,4 @@ - CREATE UNIQUE PathAttr_sourceId_pwaySrc + CREATE UNIQUE index PathAttr_sourceId_pwaySrc ON :SCHEMA.PathwayAttributes (source_id, pathway_source) ; From 48d2470daa966ca221e6364521bc73f17c7dba45 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 12:53:59 -0400 Subject: [PATCH 037/112] fix typo --- Model/lib/psql/webready/global/PathwayNodes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql index 20b181f77c..406129eefb 100644 --- a/Model/lib/psql/webready/global/PathwayNodes.psql +++ b/Model/lib/psql/webready/global/PathwayNodes.psql @@ -134,7 +134,7 @@ SELECT pathway_id , all_edges , string_agg(e_id::varchar, '_' ORDER BY e_id) AS parent - FROM :SCHEMA.AllEnzymeEdges + FROM AllEnzymeEdges GROUP BY pathway_id , all_edges HAVING COUNT (*) > 1 From 36184a875d4ca15af061044d2b5ec4cb02fd1b44 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 12:58:39 -0400 Subject: [PATCH 038/112] fix typo --- Model/lib/psql/webready/global/PathwayNodes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql index 406129eefb..6510bb9735 100644 --- a/Model/lib/psql/webready/global/PathwayNodes.psql +++ b/Model/lib/psql/webready/global/PathwayNodes.psql @@ -228,7 +228,7 @@ AND ot1.name != 'enzyme' AND ot2.name != 'enzyme' ) rel - , PathwayAttributes pa + , :SCHEMA.PathwayAttributes pa WHERE pa.pathway_id = rel.pathway_id ; From a4dbadb5d08d407153f8e00ff17929839c3b5f77 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 13:04:16 -0400 Subject: [PATCH 039/112] fix typo --- Model/lib/psql/webready/global/PathwayNodes.psql | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Model/lib/psql/webready/global/PathwayNodes.psql b/Model/lib/psql/webready/global/PathwayNodes.psql index 6510bb9735..bcd0979283 100644 --- a/Model/lib/psql/webready/global/PathwayNodes.psql +++ b/Model/lib/psql/webready/global/PathwayNodes.psql @@ -288,14 +288,14 @@ LEFT JOIN :SCHEMA.NodesWithTypes pn ON nodes_with_parents.pathway_node_id = pn.pathway_node_id::varchar ; -drop table :SCHEMA.NodesWithTypes; -drop table :SCHEMA.ReactionsWithReversibility; -drop table :SCHEMA.EnzymeEdges; -drop table :SCHEMA.ParentNodes; -drop table :SCHEMA.NodesWithParents; -drop table :SCHEMA.EnzymeReactions; -drop table :SCHEMA.ParentsForEdges; -drop table :SCHEMA.ParentsForEdges; +drop table if exists :SCHEMA.NodesWithTypes; +drop table if exists :SCHEMA.ReactionsWithReversibility; +drop table if exists :SCHEMA.EnzymeEdges; +drop table if exists :SCHEMA.ParentNodes; +drop table if exists :SCHEMA.NodesWithParents; +drop table if exists :SCHEMA.EnzymeReactions; +drop table if exists :SCHEMA.ParentsForEdges; + From 40543879bdf879df78a0c3aae6851b0bc50c2bd7 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 13:14:14 -0400 Subject: [PATCH 040/112] missing taxon. --- Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql index d00a1a5e8d..29964ad4b5 100644 --- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql @@ -25,7 +25,7 @@ 1 as is_top_level, sequence.na_sequence_id, organism.genome_source, organism.name_for_filenames, coalesce(msa.has_msa, 0) as has_msa - FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon_id = :TAXON_ID and taxon.taxon_id = organism.taxon_id, + FROM sres.Taxon LEFT JOIN apidb.Organism ON taxon.taxon_id = :TAXON_ID and taxon.taxon_id = organism.taxon_id, sres.OntologyTerm so, ( SELECT na_sequence_id, source_id, length, chromosome, chromosome_order_num, taxon_id, description, a_count, c_count, g_count, t_count, external_database_release_id, sequence_ontology_id From 7e684ca1717a728f7c41be0e3c13f991d9282cba Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 14:03:38 -0400 Subject: [PATCH 041/112] debug --- Model/lib/psql/webready/orgSpecific/GoTermSummary.psql | 2 +- Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql | 4 ++-- Model/lib/psql/webready/orgSpecific/PANIO.psql | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql index a233539f0c..3d2fb4507f 100644 --- a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql +++ b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql @@ -49,7 +49,7 @@ AND edr.external_database_release_id = ot.external_database_release_id AND edr.external_database_id = ed.external_database_id AND ed.name ='GO_RSRC' - WHERE ggt.org_abbrev = ':ORG_ABBREV' + AND ggt.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql index c1359297b9..78bff1d74b 100644 --- a/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/IntronUtrCoords_ix.psql @@ -1,8 +1,8 @@ - CREATE INDEX :SCHEMA.iuc_srcid_ix + CREATE INDEX iuc_srcid_ix ON :SCHEMA.IntronUtrCoords (source_id, na_feature_id) ; - CREATE INDEX :SCHEMA.iuc_nfid_ix + CREATE INDEX iuc_nfid_ix ON :SCHEMA.IntronUtrCoords (na_feature_id, source_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql index c7afcbeca5..7eefd21f03 100644 --- a/Model/lib/psql/webready/orgSpecific/PANIO.psql +++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql @@ -9,7 +9,7 @@ out_type.source_id as output_pan_type_source_id, --out_type.name as output_pan_type, out_type.ontology_term_id as output_pan_type_id - FROM :SCHEMA.panextdbrls panExtDbRls, + FROM :SCHEMA.panextdbrls panExtDbRls INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id INNER JOIN study.Output o AND o.protocol_app_id = pa.protocol_app_id From a94bc2db90808b9e3f246253b2caab643726f628 Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Thu, 22 May 2025 14:40:26 -0400 Subject: [PATCH 042/112] est tables to use is_reference and update notes --- .../webready/orgSpecific/EstAttributes.psql | 26 +++++++++++------ .../webready/orgSpecific/EstSequence.psql | 29 +++++++++++-------- Model/lib/xml/tuningManager/webtables.org | 1 + 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql index 17da985e7f..c777a3a288 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql @@ -1,9 +1,8 @@ :CREATE_AND_POPULATE - - - CREATE TABLE EstAttributes AS SELECT - cast(apidb.project_id(tn.name) as varchar(20)) as project_id, + ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, + current_timestamp as modification_date, ens.source_id, e.seq_primer AS primer, ens.a_count, @@ -26,13 +25,21 @@ l.library_id, replace(l.dbest_name, '''', '-') as library_dbest_name FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm, sres.TaxonName tn, sres.ExternalDatabase ed, - sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens + sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens, + apidb.datasource ds, apidb.organism o LEFT JOIN (select query_na_sequence_id,max(ct) as best_alignment_count from ( SELECT query_na_sequence_id, COUNT(*) AS ct - FROM dots.BlatAlignment ba + FROM dots.BlatAlignment ba, apidb.datasource, apidb.organism o, + sres.externaldatabase d, sres.externaldatabaserelease r WHERE is_best_alignment = 1 + AND ba.query_external_db_release_id = r.external_database_release_id + AND r.external_database_id = d.external_database_id + AND ed.name = ds.name + AND ds.taxon_id = o.taxon_id + AND o.is_reference = 1 + AND o.taxon_id = :TAXON_ID GROUP BY target_external_db_release_id,query_na_sequence_id) t group by query_na_sequence_id ) best ON ens.na_sequence_id = best.query_na_sequence_id @@ -44,8 +51,9 @@ AND ens.external_database_release_id = edr.external_database_release_id AND edr.external_database_id = ed.external_database_id AND ens.sequence_ontology_id = oterm.ontology_term_id + AND ed.name = ds.name + and ds.taxon_id = o.taxon_id + and o.is_reference = 1 + and o.taxon_id = :TAXON_ID AND oterm.name = 'EST' - - :DECLARE_PARTITION; - diff --git a/Model/lib/psql/webready/orgSpecific/EstSequence.psql b/Model/lib/psql/webready/orgSpecific/EstSequence.psql index 9dc3effb71..e31eeca39c 100644 --- a/Model/lib/psql/webready/orgSpecific/EstSequence.psql +++ b/Model/lib/psql/webready/orgSpecific/EstSequence.psql @@ -1,16 +1,21 @@ :CREATE_AND_POPULATE - - - CREATE TABLE EstSequence AS - SELECT ens.source_id, - cast(apidb.project_id(tn.name) as varchar(20)) as project_id, - ens.sequence - FROM dots.ExternalNaSequence ens, sres.OntologyTerm oterm, sres.TaxonName tn + SELECT ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev + , current_timestamp as modification_date + , ens.source_id + , ens.sequence + FROM dots.ExternalNaSequence ens + , sres.OntologyTerm oterm + , apidb.organism o + , sres.externaldatabase d + , sres.externaldatabaserelease r + , apidb.datasource ds WHERE oterm.name = 'EST' AND oterm.ontology_term_id = ens.sequence_ontology_id - AND ens.taxon_id = tn.taxon_id - AND tn.name_class = 'scientific name' - - + AND ens.external_database_release_id = r.external_database_release_id + AND r.external_database_id = d.external_database_id + AND d.name = ds.name + AND ds.taxon_id = o.taxon_id + AND ds.taxon_id = :TAXON_ID + AND o.is_reference = 1 :DECLARE_PARTITION; - diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index fa0ecc2bde..0c80030b59 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -23,6 +23,7 @@ - [X] SequencePieceClosure - [X] GenomicSeqAttributes - [s] SequenceEnzymeClass + - Temp remove this and eventually Move to ComparativeGenomics because it depends on the OrthoMCL Derived EC Numbers - Transcript / Protein - [X] SignalPeptideDomains_ix.psql From 4005884f2e6ba056cda44cb8910d1425459ffc81 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 14:57:47 -0400 Subject: [PATCH 043/112] debug --- Model/lib/psql/webready/orgSpecific/GoTermSummary.psql | 3 ++- Model/lib/psql/webready/orgSpecific/PANIO.psql | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql index 3d2fb4507f..db5e946533 100644 --- a/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql +++ b/Model/lib/psql/webready/orgSpecific/GoTermSummary.psql @@ -21,7 +21,8 @@ ) gs ON ggt.go_term_id = gs.ontology_term_id WHERE ggt.org_abbrev = ':ORG_ABBREV' UNION - SELECT ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id, + SELECT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + ggt.gene_source_id, ggt.transcript_source_id, ggt.aa_sequence_id, ggt.taxon_id, ggt.is_not, replace (substr(ot.source_id, 1, 25),'_',':') as go_id, ot.ontology_term_id as go_term_id, ggt.ontology, diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql index 7eefd21f03..ac61b072fa 100644 --- a/Model/lib/psql/webready/orgSpecific/PANIO.psql +++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql @@ -1,5 +1,5 @@ :CREATE_AND_POPULATE - SELECT DISTINCT io.* + SELECT DISTINCT io.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, FROM ( SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id, o.protocol_app_node_id output_pan_id, @@ -12,7 +12,7 @@ FROM :SCHEMA.panextdbrls panExtDbRls INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id - INNER JOIN study.Output o AND o.protocol_app_id = pa.protocol_app_id + INNER JOIN study.Output o o.protocol_app_id = pa.protocol_app_id INNER JOIN study.ProtocolAppNode in_pan on i.protocol_app_node_id = in_pan.protocol_app_node_id INNER JOIN study.ProtocolAppNode out_pan on o.protocol_app_node_id = out_pan.protocol_app_node_id LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id From 33b8248cda983a056c5496134c4c2d3d2cfbc844 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 15:09:25 -0400 Subject: [PATCH 044/112] debug --- Model/lib/psql/webready/orgSpecific/PANIO.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql index ac61b072fa..f4824b6f27 100644 --- a/Model/lib/psql/webready/orgSpecific/PANIO.psql +++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql @@ -1,5 +1,5 @@ :CREATE_AND_POPULATE - SELECT DISTINCT io.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date, + SELECT DISTINCT io.*, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM ( SELECT i.protocol_app_node_id input_pan_id, pa.protocol_app_id, o.protocol_app_node_id output_pan_id, From 9d49eb15a523e94645f1a4ed62461bd72f672447 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 15:10:52 -0400 Subject: [PATCH 045/112] debug --- .../webready/orgSpecific/GenomicSeqAttributes_ix.psql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql index 039749930e..54f8016119 100644 --- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql @@ -1,11 +1,11 @@ - create unique pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id) + create unique index pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id) ; - create unique SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id) + create unique index SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id) ; - create unique SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id) + create unique index SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id) ; - create unique SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id) + create unique index SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id) ; From 97f480cc89eff79398cc74e7dd74867fda7520c2 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 15:22:38 -0400 Subject: [PATCH 046/112] debug --- Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql | 4 ++-- Model/lib/psql/webready/orgSpecific/PANIO.psql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql b/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql index 8c5134b475..2d0531bdfa 100644 --- a/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GoTermSummary_ix.psql @@ -1,7 +1,7 @@ - create GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source) + create index GoTermSum_aaSeqId_idx ON :SCHEMA.GoTermSummary (aa_sequence_id, go_id, source) ; - create GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary + create index GoTermSum_plugin_ix ON :SCHEMA.GoTermSummary (ontology, gene_source_id, is_not, is_go_slim, go_id, go_term_name, evidence_code, evidence_category) ; diff --git a/Model/lib/psql/webready/orgSpecific/PANIO.psql b/Model/lib/psql/webready/orgSpecific/PANIO.psql index f4824b6f27..b9b918779f 100644 --- a/Model/lib/psql/webready/orgSpecific/PANIO.psql +++ b/Model/lib/psql/webready/orgSpecific/PANIO.psql @@ -12,7 +12,7 @@ FROM :SCHEMA.panextdbrls panExtDbRls INNER JOIN study.Input i on i.protocol_app_node_id = panExtDbRls.pan_id INNER JOIN study.ProtocolApp pa on i.protocol_app_id = pa.protocol_app_id - INNER JOIN study.Output o o.protocol_app_id = pa.protocol_app_id + INNER JOIN study.Output o on o.protocol_app_id = pa.protocol_app_id INNER JOIN study.ProtocolAppNode in_pan on i.protocol_app_node_id = in_pan.protocol_app_node_id INNER JOIN study.ProtocolAppNode out_pan on o.protocol_app_node_id = out_pan.protocol_app_node_id LEFT JOIN sres.OntologyTerm out_type ON out_pan.type_id = out_type.ontology_term_id From dc1f03886c72e2ad748ebc5bca634aba3eb01eb2 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 15:40:34 -0400 Subject: [PATCH 047/112] debug --- .../webready/orgSpecific/GenomicSeqAttributes_ix.psql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql index 54f8016119..de455dd7f4 100644 --- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes_ix.psql @@ -1,11 +1,11 @@ - create unique index pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (lower(source_id), project_id) + create unique index pk_SeqAttr_ ON :SCHEMA.GenomicSeqAttributes (org_abbrev, lower(source_id), project_id) ; - create unique index SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (source_id) + create unique index SeqAttr_source_id ON :SCHEMA.GenomicSeqAttributes (org_abbrev, source_id) ; - create unique index SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (na_sequence_id) + create unique index SeqAttr_naseqid ON :SCHEMA.GenomicSeqAttributes (org_abbrev, na_sequence_id) ; - create unique index SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (taxon_id, source_id) + create unique index SeqAttr_taxsrc_id ON :SCHEMA.GenomicSeqAttributes (org_abbrev, taxon_id, source_id) ; From f03dad2e45bb812e634374ade439074362613e4d Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 15:43:06 -0400 Subject: [PATCH 048/112] debug --- Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql index eed35a9960..3cad24bdcc 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAttributes_ix.psql @@ -1,6 +1,6 @@ - create unique index EstAttr_source_id ON :SCHEMA.EstAttributes (source_id) + create unique index EstAttr_source_id ON :SCHEMA.EstAttributes (org_abbrev, source_id) ; From 9cbcbc9ec36c30e0825792a3c46b2cdeb4b519a2 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 15:54:21 -0400 Subject: [PATCH 049/112] debug --- Model/lib/psql/webready/orgSpecific/EstAttributes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql index c777a3a288..5550ae5d67 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql @@ -36,7 +36,7 @@ WHERE is_best_alignment = 1 AND ba.query_external_db_release_id = r.external_database_release_id AND r.external_database_id = d.external_database_id - AND ed.name = ds.name + AND d.name = ds.name AND ds.taxon_id = o.taxon_id AND o.is_reference = 1 AND o.taxon_id = :TAXON_ID From affcf6d652cb24a15259da1d0947ba965737f818 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 16:10:42 -0400 Subject: [PATCH 050/112] debug indexes --- .../orgSpecific/ChrCopyNumbers_ix.psql | 4 ++-- .../orgSpecific/GeneAttributes_ix.psql | 24 +++++++++---------- .../webready/orgSpecific/GeneGoTable_ix.psql | 2 +- .../orgSpecific/GeneIntJuncStats_ix.psql | 2 +- .../orgSpecific/GeneIntronJunction_ix.psql | 6 ++--- .../orgSpecific/GeneModelDump_ix.psql | 2 +- .../GenomicSequenceSequence_ix.psql | 2 +- .../orgSpecific/NameMappingGIJ_ix.psql | 2 +- .../psql/webready/orgSpecific/PANIO_ix.psql | 8 +++---- .../orgSpecific/PathwaysGeneTable_ix.psql | 2 +- .../orgSpecific/TranscriptAttributes_ix.psql | 22 ++++++++--------- 11 files changed, 38 insertions(+), 38 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql index 4f989590a9..c5a7eff545 100644 --- a/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/ChrCopyNumbers_ix.psql @@ -1,9 +1,9 @@ - CREATE ChrCN_ix + CREATE index ChrCN_ix ON :SCHEMA.ChrCopyNumbers (input_pan_id, na_sequence_id) ; - CREATE ChrCN_output + CREATE index ChrCN_output ON :SCHEMA.ChrCopyNumbers (output_pan_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql index 9f45a0abad..2e66f521e6 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes_ix.psql @@ -1,47 +1,47 @@ - CREATE UNIQUE GeneAttr_srcPrj - ON :SCHEMA.GeneAttributes (source_id) + CREATE UNIQUE INDEX GeneAttr_srcPrj + ON :SCHEMA.GeneAttributes (org_abbrev, source_id) ; - CREATE GeneAttr_exon_ix + CREATE INDEX GeneAttr_exon_ix ON :SCHEMA.GeneAttributes (exon_count, source_id, project_id) ; - CREATE GeneAttr_loc_ix + CREATE INDEX GeneAttr_loc_ix ON :SCHEMA.GeneAttributes (na_sequence_id, start_min, end_max, is_reversed, na_feature_id, is_deprecated) ; - CREATE GeneAttr_feat_ix + CREATE INDEX GeneAttr_feat_ix ON :SCHEMA.GeneAttributes (na_feature_id, na_sequence_id, start_min, end_max, is_reversed) ; - CREATE GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes ( + CREATE INDEX GeneAttr_orthoname_ix ON :SCHEMA.GeneAttributes ( orthomcl_name, source_id, taxon_id, gene_type, na_feature_id, na_sequence_id, start_min, end_max, organism, species, product, project_id ) ; - CREATE GeneAttr_ortholog_ix + CREATE INDEX GeneAttr_ortholog_ix ON :SCHEMA.GeneAttributes (source_id, na_sequence_id, start_min, end_max, orthomcl_name, na_feature_id) ; - CREATE GeneAttr_orgsrc_ix + CREATE INDEX GeneAttr_orgsrc_ix ON :SCHEMA.GeneAttributes (organism, source_id, na_sequence_id, start_min, end_max) ; - CREATE GeneAttr_prjsrc_ix + CREATE INDEX GeneAttr_prjsrc_ix ON :SCHEMA.GeneAttributes (project_id, organism, source_id, coalesce(IS_DEPRECATED,0)) ; - CREATE GeneAttr_txid_ix + CREATE INDEX GeneAttr_txid_ix ON :SCHEMA.GeneAttributes (taxon_id, source_id, gene_type, na_feature_id, project_id) ; - CREATE GeneAttr_ids_ix + CREATE INDEX GeneAttr_ids_ix ON :SCHEMA.GeneAttributes (na_feature_id, source_id, project_id) ; - CREATE GeneAttr_loc_intjunc_ix + CREATE INDEX GeneAttr_loc_intjunc_ix ON :SCHEMA.GeneAttributes (NA_SEQUENCE_ID, START_MIN, IS_REVERSED, END_MAX) ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql index a065517030..5e34d52885 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneGoTable_ix.psql @@ -1,4 +1,4 @@ - create ggtab_ix ON :SCHEMA.GeneGoTable + create index ggtab_ix ON :SCHEMA.GeneGoTable (source_id, project_id, go_id, transcript_ids, is_not, go_term_name, ontology, source, evidence_code, reference, evidence_code_parameter, sort_key) ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql index 89389667a0..89d3004344 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql @@ -1,3 +1,3 @@ - create GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id) + create index GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql index 3c0cff5069..ad0f7593c5 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql @@ -1,10 +1,10 @@ - create gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) + create index gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) ; - create gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id) + create index gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id) ; - create gijnew_txnloc_ix + create index gijnew_txnloc_ix on :SCHEMA.GeneIntronJunction (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed, total_unique, total_isrpm, annotated_intron) diff --git a/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql index e4388deb01..2a73000f95 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneModelDump_ix.psql @@ -1,4 +1,4 @@ - create gmd_ix + create index gmd_ix on :SCHEMA.GeneModelDump (source_id, project_id, sequence_id, gm_start, gm_end, is_reversed, type, transcript_ids) ; diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql index 497278e368..dc3711740d 100644 --- a/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GenomicSequenceSequence_ix.psql @@ -1,3 +1,3 @@ - create GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id) + create index GenomicSeq_ix on :SCHEMA.GenomicSequenceSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql index 4245fce828..c386e6ba00 100644 --- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql @@ -1,3 +1,3 @@ - create namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id) + create index namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql b/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql index 9bbb34ec21..f809a0b722 100644 --- a/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/PANIO_ix.psql @@ -1,4 +1,4 @@ - create painio2_iix on :SCHEMA.PANIO + create index painio2_iix on :SCHEMA.PANIO (input_pan_id, output_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) @@ -6,7 +6,7 @@ - create painio2_oix on :SCHEMA.PANIO + create index painio2_oix on :SCHEMA.PANIO (output_pan_id, input_pan_id, protocol_app_id, input_pan_type_source_id, output_pan_type_source_id) @@ -14,7 +14,7 @@ - create painio2_otypeix on :SCHEMA.PANIO + create index painio2_otypeix on :SCHEMA.PANIO (output_pan_type_source_id, input_pan_type_source_id, output_pan_id, input_pan_id, protocol_app_id) @@ -22,7 +22,7 @@ - create painio2_itypeix on :SCHEMA.PANIO + create index painio2_itypeix on :SCHEMA.PANIO (input_pan_type_source_id, output_pan_type_source_id, input_pan_id, output_pan_id, protocol_app_id) diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql index 40750a7f84..f69349df0c 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable_ix.psql @@ -1,4 +1,4 @@ - create pgt_ix on :SCHEMA.PathwaysGeneTable + create index pgt_ix on :SCHEMA.PathwaysGeneTable (gene_source_id, project_id, pathway_source_id, pathway_name, reactions, enzyme, expasy_url, pathway_source, exact_match) diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql index dd1670b4a3..d0eee715fd 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes_ix.psql @@ -1,39 +1,39 @@ CREATE UNIQUE INDEX TranscriptAttr_sourceId - ON :SCHEMA.TranscriptAttributes (source_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, source_id) ; CREATE UNIQUE INDEX TranscriptAttr_srcPrj - ON :SCHEMA.TranscriptAttributes (source_id, gene_source_id, project_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, source_id, gene_source_id, project_id) ; CREATE UNIQUE INDEX TranscriptAttr_genesrc - ON :SCHEMA.TranscriptAttributes (gene_source_id, source_id, project_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_source_id, source_id, project_id) ; CREATE UNIQUE INDEX TranscriptAttr_exon_ix - ON :SCHEMA.TranscriptAttributes (gene_exon_count, source_id, gene_source_id, project_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_exon_count, source_id, gene_source_id, project_id) ; CREATE UNIQUE INDEX TranscriptAttr_loc_ix ON :SCHEMA.TranscriptAttributes - (na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id, + (org_abbrev, na_sequence_id, gene_start_min, gene_end_max, is_reversed, na_feature_id, is_deprecated, source_id, gene_source_id, project_id) ; CREATE UNIQUE INDEX TranscriptAttr_feat_ix - ON :SCHEMA.TranscriptAttributes (na_feature_id, source_id, gene_source_id, project_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, na_feature_id, source_id, gene_source_id, project_id) ; CREATE UNIQUE INDEX TranscriptAttr_geneid_ix - ON :SCHEMA.TranscriptAttributes (gene_id, source_id, gene_source_id, project_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_id, source_id, gene_source_id, project_id) ; CREATE UNIQUE INDEX TransAttr_orthoname_ix - ON :SCHEMA.TranscriptAttributes (orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, orthomcl_name, source_id, taxon_id, gene_type, organism, gene_source_id, project_id) ; CREATE UNIQUE INDEX TransAttr_molwt_ix - ON :SCHEMA.TranscriptAttributes (taxon_id, molecular_weight, source_id, gene_source_id, project_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, taxon_id, molecular_weight, source_id, gene_source_id, project_id) ; CREATE INDEX TransAttr_ortholog_ix @@ -55,13 +55,13 @@ CREATE UNIQUE INDEX TrnscrptAttr_geneinfo ON :SCHEMA.TranscriptAttributes - (gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id, + (org_abbrev, gene_source_id, project_id, source_id, na_feature_id, spliced_na_sequence_id, protein_source_id, na_sequence_id, length, protein_length, five_prime_utr_length, three_prime_utr_length) ; CREATE UNIQUE INDEX TranscriptAttr_genenaf - ON :SCHEMA.TranscriptAttributes (gene_na_feature_id, gene_source_id, source_id, project_id) + ON :SCHEMA.TranscriptAttributes (org_abbrev, gene_na_feature_id, gene_source_id, source_id, project_id) ; CREATE INDEX TransAttr_locsIds_ix From 73a0c35c454bb263bf433257fb4fab0e779f41f7 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 16:14:04 -0400 Subject: [PATCH 051/112] debug indexes --- Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql index a5cfad9f82..4968f458a8 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql @@ -80,7 +80,7 @@ - create index :SCHEMA.:ORG_ABBREVProteinGoAttr_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id) + create index ProteinGoAttr_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id) ; From 6b97ff9875de545c034c99ddf37809762234a4b0 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 16:38:57 -0400 Subject: [PATCH 052/112] debug indexes --- Model/lib/psql/webready/orgSpecific/EstAttributes.psql | 6 +++--- Model/lib/psql/webready/orgSpecific/EstSequence.psql | 2 +- Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql index 5550ae5d67..110b43a673 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql @@ -31,14 +31,14 @@ (select query_na_sequence_id,max(ct) as best_alignment_count from ( SELECT query_na_sequence_id, COUNT(*) AS ct - FROM dots.BlatAlignment ba, apidb.datasource, apidb.organism o, + FROM dots.BlatAlignment ba, apidb.datasource ds, apidb.organism o, sres.externaldatabase d, sres.externaldatabaserelease r WHERE is_best_alignment = 1 AND ba.query_external_db_release_id = r.external_database_release_id AND r.external_database_id = d.external_database_id AND d.name = ds.name AND ds.taxon_id = o.taxon_id - AND o.is_reference = 1 + AND o.is_reference_strain = 1 AND o.taxon_id = :TAXON_ID GROUP BY target_external_db_release_id,query_na_sequence_id) t group by query_na_sequence_id @@ -53,7 +53,7 @@ AND ens.sequence_ontology_id = oterm.ontology_term_id AND ed.name = ds.name and ds.taxon_id = o.taxon_id - and o.is_reference = 1 + and o.is_reference_strain = 1 and o.taxon_id = :TAXON_ID AND oterm.name = 'EST' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webready/orgSpecific/EstSequence.psql b/Model/lib/psql/webready/orgSpecific/EstSequence.psql index e31eeca39c..58bc1dfd95 100644 --- a/Model/lib/psql/webready/orgSpecific/EstSequence.psql +++ b/Model/lib/psql/webready/orgSpecific/EstSequence.psql @@ -17,5 +17,5 @@ AND d.name = ds.name AND ds.taxon_id = o.taxon_id AND ds.taxon_id = :TAXON_ID - AND o.is_reference = 1 + AND o.is_reference_strain = 1 :DECLARE_PARTITION; diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql index 4968f458a8..52bb9d668e 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql @@ -158,7 +158,7 @@ row_number() over (partition by t.source_id order by tas.length desc) as rank_in_transcript, uniprot.uniprot_ids FROM - dots.Transcript t, + dots.Transcript t INNER JOIN dots.GeneFeature gf ON gf.na_feature_id = t.parent_id INNER JOIN dots.nasequence nas ON gf.na_sequence_id = nas.na_sequence_id AND nas.taxon_id = :TAXON_ID INNER JOIN dots.TranslatedAaFeature taf ON t.na_feature_id = taf.na_feature_id From 61b40b1ed43e8a0e383d13487601a50fa3b9dbe6 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 16:59:57 -0400 Subject: [PATCH 053/112] debug --- Model/lib/psql/webready/orgSpecific/EstAttributes.psql | 4 ++-- Model/lib/psql/webready/orgSpecific/GeneId_ix.psql | 10 +++++----- .../psql/webready/orgSpecific/ProteinAttributes.psql | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql index 110b43a673..4564bd2631 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAttributes.psql @@ -25,8 +25,8 @@ l.library_id, replace(l.dbest_name, '''', '-') as library_dbest_name FROM dots.Est e, dots.Library l, sres.Taxon, sres.OntologyTerm oterm, sres.TaxonName tn, sres.ExternalDatabase ed, - sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens, - apidb.datasource ds, apidb.organism o + apidb.datasource ds, apidb.organism o, + sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens LEFT JOIN (select query_na_sequence_id,max(ct) as best_alignment_count from ( diff --git a/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql index 2f2fd86202..5e3f12dce7 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneId_ix.psql @@ -1,15 +1,15 @@ - CREATE INDEX :SCHEMA.GeneId_gene_idx ON :SCHEMA.GeneId (gene, id) + CREATE INDEX GeneId_gene_idx ON :SCHEMA.GeneId (gene, id) ; - CREATE INDEX :SCHEMA.GeneId_id_idx ON :SCHEMA.GeneId (id, gene) + CREATE INDEX GeneId_id_idx ON :SCHEMA.GeneId (id, gene) ; - CREATE INDEX :SCHEMA.GeneId_uniqid_idx ON :SCHEMA.GeneId (unique_mapping, id, gene) + CREATE INDEX GeneId_uniqid_idx ON :SCHEMA.GeneId (unique_mapping, id, gene) ; - CREATE INDEX :SCHEMA.GeneId_lowid_idx ON :SCHEMA.GeneId (lower(id), gene) + CREATE INDEX GeneId_lowid_idx ON :SCHEMA.GeneId (lower(id), gene) ; - CREATE INDEX :SCHEMA.GeneId_uniqlowid_idx ON :SCHEMA.GeneId (unique_mapping, lower(id), gene) + CREATE INDEX GeneId_uniqlowid_idx ON :SCHEMA.GeneId (unique_mapping, lower(id), gene) ; diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql index 52bb9d668e..1f0e0f78e9 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql @@ -85,9 +85,9 @@ ; - DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp; + DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp; - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp AS + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp AS SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers FROM (SELECT DISTINCT asec.aa_sequence_id, ec.ec_number || ' (' || ec.description || ')' AS ec_number @@ -101,10 +101,10 @@ ; - DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp; + DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp; --TODO: these rows will not exist in org specific land - -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp AS + -- CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp AS -- SELECT aa_sequence_id, SUBSTR(string_agg(ec_number, ';' order by ec_number),1, 300) AS ec_numbers_derived -- FROM (SELECT DISTINCT asec.aa_sequence_id, -- ec.ec_number || ' (' || ec.description || ')' AS ec_number From 6dacc1cf6167d3d7739c6cc01ef00abf7e9b7d11 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 17:03:50 -0400 Subject: [PATCH 054/112] debug --- .../webready/orgSpecific/NameMappingGIJ.psql | 309 +++++++++++++++++- 1 file changed, 307 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql index 678512b373..8ac689abd8 100644 --- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql +++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql @@ -4,7 +4,7 @@ WITH ij AS ( SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name, regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name - FROM panio p, panio pj, results.nafeatureexpression nafe, study.protocolappnode pan + FROM :SCHEMA.:ORG_ABBREVpanio p, :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) AND pj.input_pan_id = p.input_pan_id AND p.output_pan_id = pan.protocol_app_node_id @@ -65,7 +65,312 @@ cb.value::NUMERIC as number_mapped_reads, cc.value::NUMERIC as avg_mapping_coverage FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - PANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb, + :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan + WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) + AND pj.input_pan_id = p.input_pan_id + AND p.output_pan_id = pan.protocol_app_node_id + AND pan.name like '%tpm - unique%' + AND p.output_pan_id = nafe.protocol_app_node_id + GROUP BY pj.output_pan_id, p.output_pan_id, pan.name + ORDER BY pj.output_pan_id + ) , stats AS ( + SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, + sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier + FROM apidb.IntronJunction + WHERE unique_reads >= 1 + GROUP BY protocol_app_node_id + ), part AS ( + SELECT + ij.junctions_pan_id, ij.avg_value, stats.multiplier + , max(ij.expression_pan_id) OVER w as max_exp_pan_id + , max(ij.sample_name) OVER w as max_sample_Name + , max(ij.exp_name) OVER w as max_exp_name + FROM ij, stats + WHERE ij.junctions_pan_id = stats.protocol_app_node_id + WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) + ) + SELECT DISTINCT * FROM ( + SELECT junctions_pan_id + , first_value(max_exp_pan_id) OVER w1 as exp_pan_id + , first_value(max_sample_name) OVER w1 as sample_name + , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands + , multiplier + FROM part + WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) + ) t + ORDER BY junctions_pan_id + + ; + + + + create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) + + ; + + drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; + + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( + junctions_pan_id, + read_length, + mapped_reads, + avg_mapping_coverage, + num_replicates + ) AS + SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, + round(avg(number_mapped_reads),1) as mapped_reads, + round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) + as avg_mapping_coverage, + count(*) as num_replicates + FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, + cb.value::NUMERIC as number_mapped_reads, + cc.value::NUMERIC as avg_mapping_coverage + FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, + :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan + WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) + AND pj.input_pan_id = p.input_pan_id + AND p.output_pan_id = pan.protocol_app_node_id + AND pan.name like '%tpm - unique%' + AND p.output_pan_id = nafe.protocol_app_node_id + GROUP BY pj.output_pan_id, p.output_pan_id, pan.name + ORDER BY pj.output_pan_id + ) , stats AS ( + SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, + sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier + FROM apidb.IntronJunction + WHERE unique_reads >= 1 + GROUP BY protocol_app_node_id + ), part AS ( + SELECT + ij.junctions_pan_id, ij.avg_value, stats.multiplier + , max(ij.expression_pan_id) OVER w as max_exp_pan_id + , max(ij.sample_name) OVER w as max_sample_Name + , max(ij.exp_name) OVER w as max_exp_name + FROM ij, stats + WHERE ij.junctions_pan_id = stats.protocol_app_node_id + WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) + ) + SELECT DISTINCT * FROM ( + SELECT junctions_pan_id + , first_value(max_exp_pan_id) OVER w1 as exp_pan_id + , first_value(max_sample_name) OVER w1 as sample_name + , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands + , multiplier + FROM part + WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) + ) t + ORDER BY junctions_pan_id + + ; + + + + create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) + + ; + + drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; + + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( + junctions_pan_id, + read_length, + mapped_reads, + avg_mapping_coverage, + num_replicates + ) AS + SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, + round(avg(number_mapped_reads),1) as mapped_reads, + round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) + as avg_mapping_coverage, + count(*) as num_replicates + FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, + cb.value::NUMERIC as number_mapped_reads, + cc.value::NUMERIC as avg_mapping_coverage + FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, + :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan + WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) + AND pj.input_pan_id = p.input_pan_id + AND p.output_pan_id = pan.protocol_app_node_id + AND pan.name like '%tpm - unique%' + AND p.output_pan_id = nafe.protocol_app_node_id + GROUP BY pj.output_pan_id, p.output_pan_id, pan.name + ORDER BY pj.output_pan_id + ) , stats AS ( + SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, + sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier + FROM apidb.IntronJunction + WHERE unique_reads >= 1 + GROUP BY protocol_app_node_id + ), part AS ( + SELECT + ij.junctions_pan_id, ij.avg_value, stats.multiplier + , max(ij.expression_pan_id) OVER w as max_exp_pan_id + , max(ij.sample_name) OVER w as max_sample_Name + , max(ij.exp_name) OVER w as max_exp_name + FROM ij, stats + WHERE ij.junctions_pan_id = stats.protocol_app_node_id + WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) + ) + SELECT DISTINCT * FROM ( + SELECT junctions_pan_id + , first_value(max_exp_pan_id) OVER w1 as exp_pan_id + , first_value(max_sample_name) OVER w1 as sample_name + , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands + , multiplier + FROM part + WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) + ) t + ORDER BY junctions_pan_id + + ; + + + + create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) + + ; + + drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; + + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( + junctions_pan_id, + read_length, + mapped_reads, + avg_mapping_coverage, + num_replicates + ) AS + SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, + round(avg(number_mapped_reads),1) as mapped_reads, + round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) + as avg_mapping_coverage, + count(*) as num_replicates + FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, + cb.value::NUMERIC as number_mapped_reads, + cc.value::NUMERIC as avg_mapping_coverage + FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, + :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan + WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) + AND pj.input_pan_id = p.input_pan_id + AND p.output_pan_id = pan.protocol_app_node_id + AND pan.name like '%tpm - unique%' + AND p.output_pan_id = nafe.protocol_app_node_id + GROUP BY pj.output_pan_id, p.output_pan_id, pan.name + ORDER BY pj.output_pan_id + ) , stats AS ( + SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, + sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier + FROM apidb.IntronJunction + WHERE unique_reads >= 1 + GROUP BY protocol_app_node_id + ), part AS ( + SELECT + ij.junctions_pan_id, ij.avg_value, stats.multiplier + , max(ij.expression_pan_id) OVER w as max_exp_pan_id + , max(ij.sample_name) OVER w as max_sample_Name + , max(ij.exp_name) OVER w as max_exp_name + FROM ij, stats + WHERE ij.junctions_pan_id = stats.protocol_app_node_id + WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) + ) + SELECT DISTINCT * FROM ( + SELECT junctions_pan_id + , first_value(max_exp_pan_id) OVER w1 as exp_pan_id + , first_value(max_sample_name) OVER w1 as sample_name + , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands + , multiplier + FROM part + WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) + ) t + ORDER BY junctions_pan_id + + ; + + + + create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) + + ; + + drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; + + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( + junctions_pan_id, + read_length, + mapped_reads, + avg_mapping_coverage, + num_replicates + ) AS + SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, + round(avg(number_mapped_reads),1) as mapped_reads, + round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) + as avg_mapping_coverage, + count(*) as num_replicates + FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, + cb.value::NUMERIC as number_mapped_reads, + cc.value::NUMERIC as avg_mapping_coverage + FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, + :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan + WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) + AND pj.input_pan_id = p.input_pan_id + AND p.output_pan_id = pan.protocol_app_node_id + AND pan.name like '%tpm - unique%' + AND p.output_pan_id = nafe.protocol_app_node_id + GROUP BY pj.output_pan_id, p.output_pan_id, pan.name + ORDER BY pj.output_pan_id + ) , stats AS ( + SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, + sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier + FROM apidb.IntronJunction + WHERE unique_reads >= 1 + GROUP BY protocol_app_node_id + ), part AS ( + SELECT + ij.junctions_pan_id, ij.avg_value, stats.multiplier + , max(ij.expression_pan_id) OVER w as max_exp_pan_id + , max(ij.sample_name) OVER w as max_sample_Name + , max(ij.exp_name) OVER w as max_exp_name + FROM ij, stats + WHERE ij.junctions_pan_id = stats.protocol_app_node_id + WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) + ) + SELECT DISTINCT * FROM ( + SELECT junctions_pan_id + , first_value(max_exp_pan_id) OVER w1 as exp_pan_id + , first_value(max_sample_name) OVER w1 as sample_name + , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands + , multiplier + FROM part + WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) + ) t + ORDER BY junctions_pan_id + + ; + + + + create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) + + ; + + drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; + + CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( + junctions_pan_id, + read_length, + mapped_reads, + avg_mapping_coverage, + num_replicates + ) AS + SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, + round(avg(number_mapped_reads),1) as mapped_reads, + round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) + as avg_mapping_coverage, + count(*) as num_replicates + FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, + cb.value::NUMERIC as number_mapped_reads, + cc.value::NUMERIC as avg_mapping_coverage + FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, + :SCHEMA.:ORG_ABBREVPANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb, STUDY.CHARACTERISTIC cc, sres.ontologyterm otc WHERE je.junctions_pan_id = ioa.output_pan_id AND ioa.input_pan_id = ca.protocol_app_node_id From 3235c0c08e86322745b3cc1334f20b5e8db0d3e9 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 17:10:46 -0400 Subject: [PATCH 055/112] debug --- Model/lib/psql/webready/orgSpecific/GeneId.psql | 4 ++-- Model/lib/psql/webready/orgSpecific/ProteinSequence.psql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GeneId.psql b/Model/lib/psql/webready/orgSpecific/GeneId.psql index 75b1bfdab5..315917f872 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneId.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneId.psql @@ -13,7 +13,7 @@ create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as ) ; -create index :SCHEMA.:ORG_ABBREVGeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id) +create index GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id) ; @@ -261,7 +261,7 @@ create index :SCHEMA.:ORG_ABBREVGeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_AB - CREATE UNIQUE INDEX :ORG_ABBREV_gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id) + CREATE UNIQUE INDEX gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql index 68a8664412..0d2f582182 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinSequence.psql @@ -1,7 +1,7 @@ :CREATE_AND_POPULATE WITH pAttr AS ( SELECT distinct source_id, aa_sequence_id - FROM ProteinAttributes where org_abbrev = ':ORG_ABBREV') + FROM :SCHEMA.ProteinAttributes where org_abbrev = ':ORG_ABBREV') SELECT pa.source_id, tas.sequence, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM pAttr pa, dots.TranslatedAaSequence tas From 1b3de8eb4adeacebeef3361328e9258e404ad13b Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 17:20:00 -0400 Subject: [PATCH 056/112] debug --- .../webready/orgSpecific/NameMappingGIJ.psql | 306 +----------------- 1 file changed, 7 insertions(+), 299 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql index 8ac689abd8..1dbca91c94 100644 --- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql +++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql @@ -4,12 +4,14 @@ WITH ij AS ( SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name, regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name - FROM :SCHEMA.:ORG_ABBREVpanio p, :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan + FROM :SCHEMA.panio p, :SCHEMA.panio pj, results.nafeatureexpression nafe, study.protocolappnode pan WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) AND pj.input_pan_id = p.input_pan_id AND p.output_pan_id = pan.protocol_app_node_id AND pan.name like '%tpm - unique%' AND p.output_pan_id = nafe.protocol_app_node_id + and p.org_abbrev = ':ORG_ABBREV' + and pj.org_abbrev = ':ORG_ABBREV' GROUP BY pj.output_pan_id, p.output_pan_id, pan.name ORDER BY pj.output_pan_id ) , stats AS ( @@ -43,7 +45,7 @@ - create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) + create index junexpgijtmp_ix on :SCHEMA.:ORG_ABBREVJunExpGIJtmp(junctions_pan_id,exp_pan_id) ; @@ -64,13 +66,14 @@ FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, cb.value::NUMERIC as number_mapped_reads, cc.value::NUMERIC as avg_mapping_coverage - FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan + FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, + panio pj, results.nafeatureexpression nafe, study.protocolappnode pan WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) AND pj.input_pan_id = p.input_pan_id AND p.output_pan_id = pan.protocol_app_node_id AND pan.name like '%tpm - unique%' AND p.output_pan_id = nafe.protocol_app_node_id + and pj.org_abbrev = ':ORG_ABBREV' GROUP BY pj.output_pan_id, p.output_pan_id, pan.name ORDER BY pj.output_pan_id ) , stats AS ( @@ -103,301 +106,6 @@ ; - - create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) - - ; - - drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( - junctions_pan_id, - read_length, - mapped_reads, - avg_mapping_coverage, - num_replicates - ) AS - SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, - round(avg(number_mapped_reads),1) as mapped_reads, - round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) - as avg_mapping_coverage, - count(*) as num_replicates - FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, - cb.value::NUMERIC as number_mapped_reads, - cc.value::NUMERIC as avg_mapping_coverage - FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan - WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) - AND pj.input_pan_id = p.input_pan_id - AND p.output_pan_id = pan.protocol_app_node_id - AND pan.name like '%tpm - unique%' - AND p.output_pan_id = nafe.protocol_app_node_id - GROUP BY pj.output_pan_id, p.output_pan_id, pan.name - ORDER BY pj.output_pan_id - ) , stats AS ( - SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, - sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier - FROM apidb.IntronJunction - WHERE unique_reads >= 1 - GROUP BY protocol_app_node_id - ), part AS ( - SELECT - ij.junctions_pan_id, ij.avg_value, stats.multiplier - , max(ij.expression_pan_id) OVER w as max_exp_pan_id - , max(ij.sample_name) OVER w as max_sample_Name - , max(ij.exp_name) OVER w as max_exp_name - FROM ij, stats - WHERE ij.junctions_pan_id = stats.protocol_app_node_id - WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) - ) - SELECT DISTINCT * FROM ( - SELECT junctions_pan_id - , first_value(max_exp_pan_id) OVER w1 as exp_pan_id - , first_value(max_sample_name) OVER w1 as sample_name - , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands - , multiplier - FROM part - WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) - ) t - ORDER BY junctions_pan_id - - ; - - - - create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) - - ; - - drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( - junctions_pan_id, - read_length, - mapped_reads, - avg_mapping_coverage, - num_replicates - ) AS - SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, - round(avg(number_mapped_reads),1) as mapped_reads, - round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) - as avg_mapping_coverage, - count(*) as num_replicates - FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, - cb.value::NUMERIC as number_mapped_reads, - cc.value::NUMERIC as avg_mapping_coverage - FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan - WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) - AND pj.input_pan_id = p.input_pan_id - AND p.output_pan_id = pan.protocol_app_node_id - AND pan.name like '%tpm - unique%' - AND p.output_pan_id = nafe.protocol_app_node_id - GROUP BY pj.output_pan_id, p.output_pan_id, pan.name - ORDER BY pj.output_pan_id - ) , stats AS ( - SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, - sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier - FROM apidb.IntronJunction - WHERE unique_reads >= 1 - GROUP BY protocol_app_node_id - ), part AS ( - SELECT - ij.junctions_pan_id, ij.avg_value, stats.multiplier - , max(ij.expression_pan_id) OVER w as max_exp_pan_id - , max(ij.sample_name) OVER w as max_sample_Name - , max(ij.exp_name) OVER w as max_exp_name - FROM ij, stats - WHERE ij.junctions_pan_id = stats.protocol_app_node_id - WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) - ) - SELECT DISTINCT * FROM ( - SELECT junctions_pan_id - , first_value(max_exp_pan_id) OVER w1 as exp_pan_id - , first_value(max_sample_name) OVER w1 as sample_name - , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands - , multiplier - FROM part - WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) - ) t - ORDER BY junctions_pan_id - - ; - - - - create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) - - ; - - drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( - junctions_pan_id, - read_length, - mapped_reads, - avg_mapping_coverage, - num_replicates - ) AS - SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, - round(avg(number_mapped_reads),1) as mapped_reads, - round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) - as avg_mapping_coverage, - count(*) as num_replicates - FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, - cb.value::NUMERIC as number_mapped_reads, - cc.value::NUMERIC as avg_mapping_coverage - FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan - WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) - AND pj.input_pan_id = p.input_pan_id - AND p.output_pan_id = pan.protocol_app_node_id - AND pan.name like '%tpm - unique%' - AND p.output_pan_id = nafe.protocol_app_node_id - GROUP BY pj.output_pan_id, p.output_pan_id, pan.name - ORDER BY pj.output_pan_id - ) , stats AS ( - SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, - sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier - FROM apidb.IntronJunction - WHERE unique_reads >= 1 - GROUP BY protocol_app_node_id - ), part AS ( - SELECT - ij.junctions_pan_id, ij.avg_value, stats.multiplier - , max(ij.expression_pan_id) OVER w as max_exp_pan_id - , max(ij.sample_name) OVER w as max_sample_Name - , max(ij.exp_name) OVER w as max_exp_name - FROM ij, stats - WHERE ij.junctions_pan_id = stats.protocol_app_node_id - WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) - ) - SELECT DISTINCT * FROM ( - SELECT junctions_pan_id - , first_value(max_exp_pan_id) OVER w1 as exp_pan_id - , first_value(max_sample_name) OVER w1 as sample_name - , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands - , multiplier - FROM part - WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) - ) t - ORDER BY junctions_pan_id - - ; - - - - create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) - - ; - - drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( - junctions_pan_id, - read_length, - mapped_reads, - avg_mapping_coverage, - num_replicates - ) AS - SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, - round(avg(number_mapped_reads),1) as mapped_reads, - round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) - as avg_mapping_coverage, - count(*) as num_replicates - FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, - cb.value::NUMERIC as number_mapped_reads, - cc.value::NUMERIC as avg_mapping_coverage - FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - :SCHEMA.:ORG_ABBREVpanio pj, results.nafeatureexpression nafe, study.protocolappnode pan - WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) - AND pj.input_pan_id = p.input_pan_id - AND p.output_pan_id = pan.protocol_app_node_id - AND pan.name like '%tpm - unique%' - AND p.output_pan_id = nafe.protocol_app_node_id - GROUP BY pj.output_pan_id, p.output_pan_id, pan.name - ORDER BY pj.output_pan_id - ) , stats AS ( - SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, - sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier - FROM apidb.IntronJunction - WHERE unique_reads >= 1 - GROUP BY protocol_app_node_id - ), part AS ( - SELECT - ij.junctions_pan_id, ij.avg_value, stats.multiplier - , max(ij.expression_pan_id) OVER w as max_exp_pan_id - , max(ij.sample_name) OVER w as max_sample_Name - , max(ij.exp_name) OVER w as max_exp_name - FROM ij, stats - WHERE ij.junctions_pan_id = stats.protocol_app_node_id - WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) - ) - SELECT DISTINCT * FROM ( - SELECT junctions_pan_id - , first_value(max_exp_pan_id) OVER w1 as exp_pan_id - , first_value(max_sample_name) OVER w1 as sample_name - , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands - , multiplier - FROM part - WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) - ) t - ORDER BY junctions_pan_id - - ; - - - - create index junexpgijtmp_ix on JunExpGIJtmp(junctions_pan_id,exp_pan_id) - - ; - - drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( - junctions_pan_id, - read_length, - mapped_reads, - avg_mapping_coverage, - num_replicates - ) AS - SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, - round(avg(number_mapped_reads),1) as mapped_reads, - round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) - as avg_mapping_coverage, - count(*) as num_replicates - FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, - cb.value::NUMERIC as number_mapped_reads, - cc.value::NUMERIC as avg_mapping_coverage - FROM junexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - :SCHEMA.:ORG_ABBREVPANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb, - STUDY.CHARACTERISTIC cc, sres.ontologyterm otc - WHERE je.junctions_pan_id = ioa.output_pan_id - AND ioa.input_pan_id = ca.protocol_app_node_id - AND ca.value is not null - AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID - AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length' - AND ca.protocol_app_node_id = cb.protocol_app_node_id - AND cb.value is not null - AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID - AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads' - AND ca.protocol_app_node_id = cc.protocol_app_node_id - AND cc.value is not null - AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID - AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage' - ) t - GROUP by Junctions_Pan_Id - - ; - - - - CREATE INDEX mpstats_pk_ix on MappingStatsGIJtmp - (junctions_pan_id,read_length,mapped_reads,avg_mapping_coverage,num_replicates) - - - ; - :CREATE_AND_POPULATE From 130a80536e82e00385fb5b3ae9699e25eb896a31 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 17:34:32 -0400 Subject: [PATCH 057/112] debug --- Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql | 2 +- Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql b/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql index 8dad2f7481..c91f81d84d 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinSequence_ix.psql @@ -1,3 +1,3 @@ - create index ProtSeq_ix on :SCHEMA._ORG_ABBREVProteinSequence (source_id, project_id) + create index ProtSeq_ix on :SCHEMA.:ORG_ABBREVProteinSequence (source_id, project_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql index cc70739339..70a738e12d 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql @@ -6,7 +6,7 @@ substr(string_agg(uniprot_id, '+or+' order by uniprot_id), 1, 240) as uniprot_id_internal FROM (SELECT DISTINCT t.na_feature_id, dr.primary_identifier as uniprot_id FROM sres.DbRef dr, dots.DbRefNaFeature x, dots.Transcript t, - dots.genefeature gf, dots.nasequence nas + dots.genefeature gf, dots.nasequence nas, sres.ExternalDatabase d, sres.ExternalDatabaseRelease r WHERE dr.db_ref_id = x.DB_REF_ID AND t.parent_id = gf.na_feature_id From 8a2e733ae48581b1a7ae2eae14684bba3df31bd6 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 17:39:39 -0400 Subject: [PATCH 058/112] debug --- .../webready/orgSpecific/NameMappingGIJ.psql | 53 +++++++------------ 1 file changed, 18 insertions(+), 35 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql index 1dbca91c94..f4ff44a447 100644 --- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql +++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql @@ -67,41 +67,24 @@ cb.value::NUMERIC as number_mapped_reads, cc.value::NUMERIC as avg_mapping_coverage FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - panio pj, results.nafeatureexpression nafe, study.protocolappnode pan - WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) - AND pj.input_pan_id = p.input_pan_id - AND p.output_pan_id = pan.protocol_app_node_id - AND pan.name like '%tpm - unique%' - AND p.output_pan_id = nafe.protocol_app_node_id - and pj.org_abbrev = ':ORG_ABBREV' - GROUP BY pj.output_pan_id, p.output_pan_id, pan.name - ORDER BY pj.output_pan_id - ) , stats AS ( - SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, - sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier - FROM apidb.IntronJunction - WHERE unique_reads >= 1 - GROUP BY protocol_app_node_id - ), part AS ( - SELECT - ij.junctions_pan_id, ij.avg_value, stats.multiplier - , max(ij.expression_pan_id) OVER w as max_exp_pan_id - , max(ij.sample_name) OVER w as max_sample_Name - , max(ij.exp_name) OVER w as max_exp_name - FROM ij, stats - WHERE ij.junctions_pan_id = stats.protocol_app_node_id - WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) - ) - SELECT DISTINCT * FROM ( - SELECT junctions_pan_id - , first_value(max_exp_pan_id) OVER w1 as exp_pan_id - , first_value(max_sample_name) OVER w1 as sample_name - , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands - , multiplier - FROM part - WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) - ) t - ORDER BY junctions_pan_id + :SCHEMA.PANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb, + STUDY.CHARACTERISTIC cc, sres.ontologyterm otc + WHERE je.junctions_pan_id = ioa.output_pan_id + AND ioa.input_pan_id = ca.protocol_app_node_id + AND ca.value is not null + and ioa.org_abbrev = ':ORG_ABBREV' + AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID + AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length' + AND ca.protocol_app_node_id = cb.protocol_app_node_id + AND cb.value is not null + AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID + AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads' + AND ca.protocol_app_node_id = cc.protocol_app_node_id + AND cc.value is not null + AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID + AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage' + ) t + GROUP by Junctions_Pan_Id ; From fc1c4701216faa461e7fe8096b7e71ef18fb8396 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 17:50:42 -0400 Subject: [PATCH 059/112] debug --- Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql | 5 +---- .../lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql index f4ff44a447..d21a1b1cf4 100644 --- a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql +++ b/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql @@ -90,9 +90,6 @@ :CREATE_AND_POPULATE - - - CREATE TABLE NameMappingGIJ AS SELECT DISTINCT edp.dataset_presenter_display_name as exp_name, edp.external_database_name, je.sample_name, je.junctions_pan_id, je.exp_pan_id, @@ -101,7 +98,7 @@ substr(uj.value, 1, 4000) as include_unified_junctions, ms.read_length, ms.mapped_reads, ms.avg_mapping_coverage, ms.num_replicates, je.switch_strands, je.multiplier - FROM junexpgijtmp je, study.nodeNodeSet sl, study.NodeSet s, ExternalDbDatasetPresenter edp, + FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, study.nodeNodeSet sl, study.NodeSet s, ExternalDbDatasetPresenter edp, DatasetProperty dp, DatasetProperty sj, DatasetProperty uj, mappingstatsgijtmp ms WHERE sl.protocol_app_node_id = je.junctions_pan_id AND je.junctions_pan_id = ms.junctions_pan_id diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql index 70a738e12d..ebfb21590a 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql @@ -76,7 +76,7 @@ gsa.sequence_type, gsa.chromosome_order_num, gsa.na_sequence_id FROM dots.GeneFeature gf - INNER JOIN dots.nasequence seq ON seq.na_sequence_id = gf.na_sequence_id and nas.taxon_id = :TAXON_ID + INNER JOIN dots.nasequence seq ON seq.na_sequence_id = gf.na_sequence_id and seq.taxon_id = :TAXON_ID INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV' From 26020b15a0764bd8b9e9ea19e4c97eceec066498 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 17:59:54 -0400 Subject: [PATCH 060/112] debug --- Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 -- 1 file changed, 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql index ebfb21590a..73c8db86ef 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql @@ -37,7 +37,6 @@ COALESCE(preferred_name.name, any_name.name) AS gene_name, cast(coalesce(preferred_gene_product.product, any_gene_product.product, gf.product) as VARCHAR(300)) as old_gene_product, - COALESCE(gp.product, 'unspecified product') as gene_product, REPLACE(so.name, '_', ' ') AS gene_type, gf.name as gene_ebi_biotype, gi.gene_id, @@ -80,7 +79,6 @@ INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV' - LEFT JOIN :SCHEMA.GeneProduct gp ON gf.source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV' INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id INNER JOIN :SCHEMA.GenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id and gsa.org_abbrev = ':ORG_ABBREV' From 67bbd35761a90bc5b444d8172f04911b1ac24abf Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 18:01:18 -0400 Subject: [PATCH 061/112] debug --- .../psql/webready/{orgSpecific => unknown}/NameMappingGIJ.psql | 0 .../psql/webready/{orgSpecific => unknown}/NameMappingGIJ_ix.psql | 0 .../webready/{orgSpecific => unknown}/SequenceEnzymeClass.psql | 0 .../webready/{orgSpecific => unknown}/SequenceEnzymeClass_ix.psql | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename Model/lib/psql/webready/{orgSpecific => unknown}/NameMappingGIJ.psql (100%) rename Model/lib/psql/webready/{orgSpecific => unknown}/NameMappingGIJ_ix.psql (100%) rename Model/lib/psql/webready/{orgSpecific => unknown}/SequenceEnzymeClass.psql (100%) rename Model/lib/psql/webready/{orgSpecific => unknown}/SequenceEnzymeClass_ix.psql (100%) diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql b/Model/lib/psql/webready/unknown/NameMappingGIJ.psql similarity index 100% rename from Model/lib/psql/webready/orgSpecific/NameMappingGIJ.psql rename to Model/lib/psql/webready/unknown/NameMappingGIJ.psql diff --git a/Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql b/Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql similarity index 100% rename from Model/lib/psql/webready/orgSpecific/NameMappingGIJ_ix.psql rename to Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql diff --git a/Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass.psql b/Model/lib/psql/webready/unknown/SequenceEnzymeClass.psql similarity index 100% rename from Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass.psql rename to Model/lib/psql/webready/unknown/SequenceEnzymeClass.psql diff --git a/Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass_ix.psql b/Model/lib/psql/webready/unknown/SequenceEnzymeClass_ix.psql similarity index 100% rename from Model/lib/psql/webready/orgSpecific/SequenceEnzymeClass_ix.psql rename to Model/lib/psql/webready/unknown/SequenceEnzymeClass_ix.psql From 23393e593f72e0d4b632f7bb2c170decf0ae82be Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 22 May 2025 18:04:42 -0400 Subject: [PATCH 062/112] add comments --- Model/lib/xml/tuningManager/webtables.org | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index 0c80030b59..04e3ae5720 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -38,6 +38,7 @@ - [X] ProteinAttributes.psql - [ ] TranscriptAttributes_ix.psql - [ ] TranscriptAttributes.psql + - no longer has products column because that is done by TM - [X] CodingSequence_ix.psql - [X] CodingSequence.psql - [X] IntronUtrCoords_ix.psql @@ -132,6 +133,7 @@ - [ ] GeneIntronJunction.psql - [ ] NameMappingGIJ_ix.psql - [ ] NameMappingGIJ.psql + - needs to be in TM, depends on dataset presenters - [ ] GeneMaxIntronGIJ_ix.psql - should be aux table and dropped - [ ] GeneMaxIntronGIJ.psql From 3372f5fdc2026ec85c057c3cca2a87cbb81e6478 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 10:10:49 -0400 Subject: [PATCH 063/112] debug --- Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql index 73c8db86ef..60d85bd95a 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql @@ -293,7 +293,7 @@ end ) as VARCHAR(300) ) as old_gene_product, - genefeat.gene_product, +-- genefeat.gene_product, genefeat.gene_type, genefeat.gene_ebi_biotype, genefeat.gene_id, From f17c412b51730a65810658909148af92fea4b495 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 10:41:02 -0400 Subject: [PATCH 064/112] debug --- Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql index 60d85bd95a..48093a587b 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql @@ -364,7 +364,8 @@ pa.molecular_weight, pa.isoelectric_point, pa.signalp_peptide, - pa.ec_numbers, pa.ec_numbers_derived, + pa.ec_numbers, + --pa.ec_numbers_derived, pa.annotated_go_component, pa.annotated_go_function, pa.annotated_go_process, From 604dcc5161f61165771d2dc62a9c6d1f3519ff3b Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 10:47:14 -0400 Subject: [PATCH 065/112] debug --- Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql index 48093a587b..abe0104ceb 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql @@ -443,7 +443,7 @@ ; - UPDATE :ORG_ABBREVTranscriptAttributes + UPDATE :SCHEMA.TranscriptAttributes SET representative_transcript = source_id WHERE representative_transcript is null and org_abbrev = ':ORG_ABBREV' From 6431a17ab73069578221a75054c6ad354437393c Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 11:04:41 -0400 Subject: [PATCH 066/112] debug --- Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql | 2 +- .../psql/webready/orgSpecific/EstAlignmentGeneSummary.psql | 2 +- Model/lib/psql/webready/orgSpecific/GeneAttributes.psql | 4 +++- Model/lib/psql/webready/orgSpecific/GeneModelDump.psql | 2 +- Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql | 1 - Model/lib/xml/tuningManager/webtables.org | 1 + 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql index 4e1996000d..843400b3e9 100644 --- a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql +++ b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql @@ -1,6 +1,6 @@ :CREATE_AND_POPULATE SELECT DISTINCT ta.source_id, ta.gene_source_id, ta.project_id, sr.protocol_app_node_id, - ta.project_id, ta.org_abbrev, current_timestamp as modification_date + ta.project_id, ta.org_abbrev, current_timestamp as modification_date, CASE WHEN ta.is_reversed = 0 THEN round(abs(ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start)),0) diff --git a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql index dc2936487b..c4546c4fea 100644 --- a/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql +++ b/Model/lib/psql/webready/orgSpecific/EstAlignmentGeneSummary.psql @@ -50,7 +50,7 @@ /* (because they overlap no genes) */ SELECT ba.blat_alignment_id FROM dots.BlatAlignment ba, dots.NaSequence query_sequence, - sres.OntologyTerm so, dots.NaSequence target_sequence, + sres.OntologyTerm so, dots.NaSequence target_sequence WHERE query_sequence.na_sequence_id = ba.query_na_sequence_id AND query_sequence.sequence_ontology_id = so.ontology_term_id AND ba.target_na_sequence_id = target_sequence.na_sequence_id diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql index 490b20f223..61fe20efd0 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql @@ -60,7 +60,9 @@ , ta.gene_locations as locations FROM :SCHEMA.TranscriptAttributes ta INNER JOIN ( - SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo, MIN(gene_product) AS product, + SELECT gene_source_id, MIN(is_pseudo) AS is_pseudo, + --MIN(gene_product) AS product, + 'FIX ME' AS product, substr(STRING_AGG(transcript_product, ',' order by transcript_product), 1, 240) as transcript_product FROM :SCHEMA.TranscriptAttributes WHERE org_abbrev = ':ORG_ABBREV' diff --git a/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql b/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql index 2ee1bcdf1e..79eab5a4d7 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneModelDump.psql @@ -8,7 +8,7 @@ gm.type, gl.is_reversed FROM apidb.FeatureLocation gl, dots.NaSequence s, - SCHEMA.TranscriptAttributes ta, + :SCHEMA.TranscriptAttributes ta, ( SELECT CASE el.feature_type WHEN 'ExonFeature' THEN 'Exon' ELSE el.feature_type END as type, el.parent_id as na_feature_id, el.start_min as start_min, el.end_max as end_max diff --git a/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql index d8915ea386..0a6b669d8f 100644 --- a/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql +++ b/Model/lib/psql/webready/orgSpecific/TransmembraneDomains.psql @@ -4,7 +4,6 @@ , current_timestamp as modification_date , ta.source_id as transcript_source_id , ta.gene_source_id AS gene_source_id - , ta.project_id , tmf.topology AS tmf_topology , aal.start_min AS tmf_start_min , aal.end_max AS tmf_end_max diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index 04e3ae5720..d8eee266c7 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -58,6 +58,7 @@ - [X] GeneId.psql - [X] GeneAttributes_ix.psql - [X] GeneAttributes.psql + - fix gene_product - [X] GeneCopyNumbers_ix.psql - [X] GeneCopyNumbers.psql - [X] GeneGoTable_ix.psql From ede9ee692be4a865bd48a52ea008a5080daf297e Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 11:31:10 -0400 Subject: [PATCH 067/112] debug --- Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql | 2 +- Model/lib/psql/webready/orgSpecific/GeneAttributes.psql | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql index 843400b3e9..a29541ffe3 100644 --- a/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql +++ b/Model/lib/psql/webready/orgSpecific/ChIPchipTranscript.psql @@ -1,5 +1,5 @@ :CREATE_AND_POPULATE - SELECT DISTINCT ta.source_id, ta.gene_source_id, ta.project_id, sr.protocol_app_node_id, + SELECT DISTINCT ta.source_id, ta.gene_source_id, sr.protocol_app_node_id, ta.project_id, ta.org_abbrev, current_timestamp as modification_date, CASE WHEN ta.is_reversed = 0 diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql index 61fe20efd0..8ad8a6f0d0 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql @@ -12,7 +12,8 @@ , sequence_id , gene_name AS name , COALESCE(aggregates.product, aggregates.transcript_product) as old_product - , COALESCE(gp.product, 'unspecified product') as product + -- , COALESCE(gp.product, 'unspecified product') as product + , 'FIX ME' as product , gene_type , gene_ebi_biotype , gene_id @@ -88,7 +89,7 @@ ) t GROUP BY na_feature_id ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id - LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id +-- LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id WHERE ta.org_abbrev = ':ORG_ABBREV' ORDER BY ta.gene_source_id :DECLARE_PARTITION; From d9acbfeaa675f0e6308d6d65062e94974f9233e8 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 11:39:59 -0400 Subject: [PATCH 068/112] debug --- .../psql/webready/orgSpecific/GeneAttributes.psql | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql index 8ad8a6f0d0..de455ff94a 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql @@ -44,12 +44,12 @@ , gene_context_start as context_start , gene_context_end as context_end , orthomcl_name - , gene_total_hts_snps as total_hts_snps - , gene_hts_nonsynonymous_snps as hts_nonsynonymous_snps - , gene_hts_stop_codon_snps as hts_stop_codon_snps - , gene_hts_noncoding_snps as hts_noncoding_snps - , gene_hts_synonymous_snps as hts_synonymous_snps - , gene_hts_nonsyn_syn_ratio as hts_nonsyn_syn_ratio + -- , gene_total_hts_snps as total_hts_snps + -- , gene_hts_nonsynonymous_snps as hts_nonsynonymous_snps + -- , gene_hts_stop_codon_snps as hts_stop_codon_snps + -- , gene_hts_noncoding_snps as hts_noncoding_snps + -- , gene_hts_synonymous_snps as hts_synonymous_snps + -- , gene_hts_nonsyn_syn_ratio as hts_nonsyn_syn_ratio , comment_string , uniprot.uniprot_id , uniprot.uniprot_id_internal From d10d7ef1dce8665498ea17ab7458ccba8966f5d1 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 11:44:18 -0400 Subject: [PATCH 069/112] debug --- Model/lib/psql/webready/orgSpecific/GeneAttributes.psql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql index de455ff94a..150593e12d 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql @@ -109,12 +109,12 @@ UPDATE :SCHEMA.GeneAttributes ga SET strain_count = ( SELECT strain_count - FROM :SCHEMA.:ORG_ABBREVSpeciesInfo si + FROM :SCHEMA.:ORG_ABBREVSpeciesInfoTmp si WHERE si.genus_species = ga.genus_species ) WHERE org_abbrev = ':ORG_ABBREV' ; -drop table :SCHEMA.:ORG_ABBREVSpeciesInfo +drop table :SCHEMA.:ORG_ABBREVSpeciesInfoTmp ; From 0843462064a7861708e5cf1844e044259861a795 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 11:51:30 -0400 Subject: [PATCH 070/112] debug --- Model/lib/psql/webready/orgSpecific/GeneAttributes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql index 150593e12d..9f8295b7cc 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql @@ -94,7 +94,7 @@ ORDER BY ta.gene_source_id :DECLARE_PARTITION; - + drop table if exists :SCHEMA.:ORG_ABBREVSpeciesInfoTmp; CREATE unlogged TABLE :SCHEMA.:ORG_ABBREVSpeciesInfoTmp as SELECT genus_species, count(distinct organism) as strain_count From 1ab9d38d58634824ba31b872b8c614411c8f3ef7 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 12:03:32 -0400 Subject: [PATCH 071/112] debug --- Model/lib/psql/webready/orgSpecific/TFBSGene.psql | 2 +- Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql index 882cf31ab9..db5a378683 100644 --- a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql +++ b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql @@ -2,7 +2,7 @@ SELECT DISTINCT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, - current_timestamp as modification_date + current_timestamp as modification_date, ga.source_id as gene_source_id, ga.organism as organism, ga.genus_species as species, diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql index 1490621a67..c585c753db 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql @@ -40,9 +40,12 @@ AND (tec.ec_number_3 = pec.ec_number_3 or tec.ec_number_3 is null or pec.ec_number_3 is null) AND (tec.ec_number_4 = pec.ec_number_4 or tec.ec_number_4 is null or pec.ec_number_4 is null) ) - SELECT DISTINCT ga.source_id + SELECT DISTINCT + ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev + , CURRENT_TIMESTAMP as modification_date + , ga.source_id , ga.gene_source_id - , ga.project_id , pa.source_id as pathway_source_id , pa.name as pathway_name , ec_match.ec_number_transcript as ec_number_gene From 7f533fee902bd1bf5553fe7ff39179abc1949d09 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 12:23:05 -0400 Subject: [PATCH 072/112] debug --- Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql | 7 +++---- .../lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 8 +++++--- Model/lib/psql/webready/orgSpecific/TFBSGene.psql | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql index f12202677e..cc66b15a5c 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql @@ -1,13 +1,12 @@ :CREATE_AND_POPULATE - - CREATE TABLE PathwayNodeGene as SELECT DISTINCT pn.pathway_node_id - , tp.gene_source_id - FROM transcriptpathway tp + , tp.gene_source_id, tp.project_id, tp.org_abbrev, current_timestamp as modification_date, + FROM :SCHEMA.transcriptpathway tp , sres.pathwaynode pn WHERE tp.pathway_id = pn.pathway_id AND tp.ec_number_gene like replace(pn.display_label, '-', '%') + and tp.org_abbrev = ':ORG_ABBREV' :DECLARE_PARTITION; diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql index e792016369..03e8516845 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql @@ -2,9 +2,10 @@ CREATE TABLE PathwaysGeneTable AS - SELECT * FROM ( + SELECT t2.*, current_timestamp as modification_date FROM ( SELECT DISTINCT gene_source_id , project_id + , tp.org_abbrev , pathway_source_id , pathway_name , count(reaction_source_id) as reactions @@ -22,7 +23,7 @@ , pr.expasy_url , tp.pathway_source , CASE max(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match - FROM TranscriptPathway tp + FROM :SCHEMA.TranscriptPathway tp , PathwayAttributes pa , PathwayCompounds pc , PathwayReactions pr @@ -33,7 +34,8 @@ AND tp.ec_number_pathway = pr.enzyme AND tp.wildcard_count_gene <= tp.wildcard_count_pathway AND pr.enzyme != '-.-.-.-' - GROUP BY tp.gene_source_id, tp.project_id, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source + AND tp.org_abbrev = ':ORG_ABBREV' + GROUP BY tp.gene_source_id, tp.project_id, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source ) t GROUP BY gene_source_id, project_id, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match ) t2 diff --git a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql index db5a378683..88a75f3f2b 100644 --- a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql +++ b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql @@ -1,8 +1,8 @@ :CREATE_AND_POPULATE + select t,*, current_timestamp as modification_date from ( SELECT DISTINCT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, - current_timestamp as modification_date, ga.source_id as gene_source_id, ga.organism as organism, ga.genus_species as species, @@ -41,7 +41,7 @@ AND ga.org_abbrev = ':ORG_ABBREV' AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000) or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) ) - + ) t :DECLARE_PARTITION; From 4c14c17d87d7910155e1b24a03149537695e3bdd Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 12:45:08 -0400 Subject: [PATCH 073/112] debug --- Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql | 2 +- Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 3 --- Model/lib/psql/webready/orgSpecific/TFBSGene.psql | 7 +++---- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql index cc66b15a5c..e0d672aa7c 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql @@ -1,7 +1,7 @@ :CREATE_AND_POPULATE SELECT DISTINCT pn.pathway_node_id - , tp.gene_source_id, tp.project_id, tp.org_abbrev, current_timestamp as modification_date, + , tp.gene_source_id, tp.project_id, tp.org_abbrev, current_timestamp as modification_date FROM :SCHEMA.transcriptpathway tp , sres.pathwaynode pn WHERE tp.pathway_id = pn.pathway_id diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql index 03e8516845..4730dcaf00 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql @@ -1,7 +1,4 @@ :CREATE_AND_POPULATE - - - CREATE TABLE PathwaysGeneTable AS SELECT t2.*, current_timestamp as modification_date FROM ( SELECT DISTINCT gene_source_id , project_id diff --git a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql index 88a75f3f2b..52fa02bb4d 100644 --- a/Model/lib/psql/webready/orgSpecific/TFBSGene.psql +++ b/Model/lib/psql/webready/orgSpecific/TFBSGene.psql @@ -1,8 +1,8 @@ :CREATE_AND_POPULATE - select t,*, current_timestamp as modification_date from ( SELECT DISTINCT ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, + current_timestamp as modification_date, ga.source_id as gene_source_id, ga.organism as organism, ga.genus_species as species, @@ -31,8 +31,8 @@ THEN '-' ELSE '+' END - END as direction, - aef.* + END as direction + -- , aef.* FROM dots.BindingSiteFeature aef, apidb.FeatureLocation arrloc, :SCHEMA.GeneAttributes ga @@ -41,7 +41,6 @@ AND ga.org_abbrev = ':ORG_ABBREV' AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000) or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) ) - ) t :DECLARE_PARTITION; From 5a254fe2cd32c595b74f4763ed56a17cbcc39845 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 13:03:13 -0400 Subject: [PATCH 074/112] debug --- Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 6 +++--- Model/lib/xml/tuningManager/webtables.org | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql index 4730dcaf00..2dc24298d4 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql @@ -21,9 +21,9 @@ , tp.pathway_source , CASE max(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match FROM :SCHEMA.TranscriptPathway tp - , PathwayAttributes pa - , PathwayCompounds pc - , PathwayReactions pr + , :SCHEMA.PathwayAttributes pa + , :SCHEMA.PathwayCompounds pc + , :SCHEMA.PathwayReactions pr WHERE tp.pathway_id = pa.pathway_id AND pc.pathway_id = pa.pathway_id AND pr.reaction_id = pc.reaction_id diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index d8eee266c7..d622483647 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -73,6 +73,7 @@ - [X] GeneSummaryFilter.psql - [X] TFBSGene_ix.psql - [X] TFBSGene.psql + - removed aef.* - [ ] PathwayNodeGene_ix.psql - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - [ ] PathwayNodeGene.psql From 8480a7004f8bf1a6090aedba31fe2d153ae2035c Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 13:11:28 -0400 Subject: [PATCH 075/112] debug --- Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql index 2dc24298d4..213386f1b0 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql @@ -2,7 +2,7 @@ SELECT t2.*, current_timestamp as modification_date FROM ( SELECT DISTINCT gene_source_id , project_id - , tp.org_abbrev + , org_abbrev , pathway_source_id , pathway_name , count(reaction_source_id) as reactions @@ -15,6 +15,7 @@ , tp.project_id , tp.pathway_source_id , tp.pathway_name + , tp.org_abbrev , pr.reaction_source_id , pr.enzyme , pr.expasy_url From 7fc4ad7cc36e038f7d51d68b4e032cfda0335d66 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 13:24:33 -0400 Subject: [PATCH 076/112] debug --- Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql index 213386f1b0..ca73bc0469 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql @@ -33,9 +33,9 @@ AND tp.wildcard_count_gene <= tp.wildcard_count_pathway AND pr.enzyme != '-.-.-.-' AND tp.org_abbrev = ':ORG_ABBREV' - GROUP BY tp.gene_source_id, tp.project_id, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source + GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrevtp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source ) t - GROUP BY gene_source_id, project_id, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match + GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match ) t2 ORDER BY pathway_source, lower(pathway_name) From 90c0ff88b27e4f47ec3ade6264796954c58c6710 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 13:30:40 -0400 Subject: [PATCH 077/112] debug --- Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql index ca73bc0469..21041cb21c 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql @@ -33,7 +33,7 @@ AND tp.wildcard_count_gene <= tp.wildcard_count_pathway AND pr.enzyme != '-.-.-.-' AND tp.org_abbrev = ':ORG_ABBREV' - GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrevtp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source + GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrev, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source ) t GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match ) t2 From c93978e3edaab4a48ce8aa6ddf31c986b6a18427 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 23 May 2025 16:40:07 -0400 Subject: [PATCH 078/112] debug --- Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql index 1f0e0f78e9..ef804fdad7 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql @@ -215,5 +215,5 @@ drop table :SCHEMA.:ORG_ABBREVGoTermList_tmp; drop table :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp; -drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEc_tmp; ---drop table :SCHEMA.:ORG_ABBREVtProteinAttrsEcDerived_tmp; +drop table :SCHEMA.:ORG_ABBREVProteinAttrsEc_tmp; +--drop table :SCHEMA.:ORG_ABBREVProteinAttrsEcDerived_tmp; From 376f8bab6e3844fa87049750d305a153089301ea Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 27 May 2025 13:51:10 -0400 Subject: [PATCH 079/112] add TaxonTree --- Model/lib/psql/webready/global/TaxonTree.psql | 7 +++++++ .../lib/psql/webready/global/TaxonTree_ix.psql | 2 ++ .../psql/webready/orgSpecific/Taxonomy.psql | 18 +++++------------- 3 files changed, 14 insertions(+), 13 deletions(-) create mode 100644 Model/lib/psql/webready/global/TaxonTree.psql create mode 100644 Model/lib/psql/webready/global/TaxonTree_ix.psql diff --git a/Model/lib/psql/webready/global/TaxonTree.psql b/Model/lib/psql/webready/global/TaxonTree.psql new file mode 100644 index 0000000000..d09a10167a --- /dev/null +++ b/Model/lib/psql/webready/global/TaxonTree.psql @@ -0,0 +1,7 @@ + CREATE TABLE :SCHEMA.TaxonTree as + SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id, + cast(tn.name as varchar(80)) as name, + cast(t.rank as varchar(24)) as rank + FROM sres.Taxon t, sres.TaxonName tn + WHERE t.taxon_id = tn.taxon_id + AND tn.name_class = 'scientific name' diff --git a/Model/lib/psql/webready/global/TaxonTree_ix.psql b/Model/lib/psql/webready/global/TaxonTree_ix.psql new file mode 100644 index 0000000000..4e31478a22 --- /dev/null +++ b/Model/lib/psql/webready/global/TaxonTree_ix.psql @@ -0,0 +1,2 @@ + CREATE INDEX taxontree_idx ON :SCHEMA.taxontree (taxon_id, name) + ; diff --git a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql index 87f0a5c1d9..66a639f094 100644 --- a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql +++ b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql @@ -1,21 +1,13 @@ :CREATE_AND_POPULATE WITH RECURSIVE cte AS ( - WITH tax AS( - SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id, - cast(tn.name as varchar(80)) as name, - cast(t.rank as varchar(24)) as rank - FROM sres.Taxon t, sres.TaxonName tn - WHERE t.taxon_id = tn.taxon_id - AND tn.name_class = 'scientific name' - ) SELECT tax.*, name as organism, ARRAY[taxon_id::numeric] as path - FROM tax + FROM :SCHEMA.taxontree WHERE taxon_id = :TAXON_ID UNION - SELECT tax.*, cte.organism, cte.path || tax.taxon_id as path - FROM tax, cte - WHERE cte.parent_id = tax.taxon_id - AND tax.name != 'root' + SELECT tt.*, cte.organism, cte.path || tt.taxon_id as path + FROM :SCHEMA.taxontree tt, cte + WHERE cte.parent_id = tt.taxon_id + AND tt.name != 'root' ) SELECT taxon_id, parent_id, ncbi_tax_id, name, rank, organism, row_number() over() as orderNum, ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date From c1ca624e3e5f7c20b2df9718e4f90258bd4b3bb9 Mon Sep 17 00:00:00 2001 From: bindu Date: Wed, 28 May 2025 08:56:22 -0400 Subject: [PATCH 080/112] fix SQL, as in #8aca696 --- .../lib/psql/webready/orgSpecific/TranscriptPathway.psql | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql index c585c753db..3280209fa6 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql @@ -1,9 +1,7 @@ :CREATE_AND_POPULATE WITH transcript_ec AS ( SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, - -- CHECK AND FIX - -- regexp_count( ec.ec_number, '-') as wildcard_count - count( ec.ec_number) as wildcard_count + regexp_count( ec.ec_number, '-') as wildcard_count FROM sres.EnzymeClass ec WHERE enzyme_class_id IN (SELECT aseqEc.enzyme_class_id FROM dots.AaSequenceEnzymeClass aseqEc, dots.aasequence seq @@ -20,9 +18,7 @@ ), pathway_ec AS ( SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, - -- CHECK AND FIX - -- regexp_count( ec.ec_number, '-') as wildcard_count - count( ec.ec_number) as wildcard_count + regexp_count( ec.ec_number, '-') as wildcard_count FROM sres.EnzymeClass ec WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM pathway_node_ec) GROUP BY ec.enzyme_class_id From f9fabd48dc5ee9de854e8870ca740b031bfe5f9e Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Wed, 28 May 2025 11:18:20 -0400 Subject: [PATCH 081/112] add group tables --- .../comparative/GroupDomainDescriptions.psql | 20 ++++ .../GroupDomainDescriptions_ix.psql | 2 + .../comparative/ProteinDomainAssignment.psql | 38 ++++++++ .../ProteinDomainAssignment_ix.psql | 11 +++ .../comparative/ProteinSequenceGroup.psql | 92 +++++++++++++++++++ .../comparative/ProteinSequenceGroup_ix.sql | 15 +++ 6 files changed, 178 insertions(+) create mode 100644 Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql create mode 100644 Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql create mode 100644 Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql create mode 100644 Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql create mode 100644 Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql create mode 100644 Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql diff --git a/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql new file mode 100644 index 0000000000..f5d73f6208 --- /dev/null +++ b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql @@ -0,0 +1,20 @@ +create table :SCHEMA.GroupDomainDescriptions as +SELECT og.group_id AS group_name, ag.descriptions +FROM apidb.OrthologGroup og, + (SELECT group_name, + STRING_AGG(accession ||' (' || num_proteins|| ')', ', ') AS descriptions + FROM (SELECT group_name, accession, num_proteins, rnk + FROM (SELECT group_name, accession, num_proteins, + rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk + FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins + FROM :SCHEMA.ProteinDomainAssignment + GROUP BY group_name,accession + ) + ) + WHERE rnk <= 3 + ) + GROUP BY group_name + ORDER BY 1 + ) ag +WHERE og.group_id = ag.group_name +) diff --git a/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql b/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql new file mode 100644 index 0000000000..27ed2e9f4f --- /dev/null +++ b/Model/lib/psql/webready/comparative/GroupDomainDescriptions_ix.psql @@ -0,0 +1,2 @@ +CREATE INDEX GroupDomainAttribute_idx ON :SCHEMA.GroupDomainDescriptions (group_name) +; \ No newline at end of file diff --git a/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql new file mode 100644 index 0000000000..ce18928333 --- /dev/null +++ b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql @@ -0,0 +1,38 @@ + create table :SCHEMA.ProteinDomainAssignment as + select sa.full_id, sa.group_name, + r.interpro_primary_id as accession, + r.interpro_desc as description, + CAST (NULL as NUMERIC) as domain_index, + sa.aa_sequence_id, + r.interpro_start_min as start_min, + r.interpro_end_min as end_max + from :SCHEMA.ProteinSequenceGroup sa, apidb.interproresults r + where sa.full_id = r.protein_source_id + and upper(r.interpro_db_name) = 'PFAM' +; + + create index domain_accession_ix&1 + on :SCHEMA.ProteinDomainAssignment (accession, full_id, group_name) + ; + + drop table if exists :SCHEMA.domainIndex_tmp +; + + create table :SCHEMA.domainIndex_tmp as + select row_number() OVER () as domain_index, accession + from (select distinct accession + from :SCHEMA.ProteinDomainAssignment + order by accession) +; + + create index domainIdxIdx on :SCHEMA.DomainIndex_tmp(accession, domain_index) +; + + update :SCHEMA.ProteinDomainAssignment da + set domain_index = (select domain_index + from :SCHEMA.DomainIndex_tmp + where accession = da.accession) +; + + drop table :SCHEMA.domainIndex_tmp + ; \ No newline at end of file diff --git a/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql b/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql new file mode 100644 index 0000000000..7fe3cdc3c9 --- /dev/null +++ b/Model/lib/psql/webready/comparative/ProteinDomainAssignment_ix.psql @@ -0,0 +1,11 @@ + create index domain_ix_ix + on :SCHEMA.ProteinDomainAssignment (domain_index, accession, full_id) +; + + create index domain_group_ix + on :SCHEMA.ProteinDomainAssignment (group_name, accession, full_id) +; + + create index domain_seq_ix + on :SCHEMA.ProteinDomainAssignment (aa_sequence_id, accession, full_id, group_name) +; \ No newline at end of file diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql new file mode 100644 index 0000000000..bfdd39fcc1 --- /dev/null +++ b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql @@ -0,0 +1,92 @@ + create table :SCHEMA.ProteinSequenceGroup as + SELECT + aas.source_id AS full_id, + aas.source_id, + aas.aa_sequence_id, + length(aas.sequence) as length, + aas.description AS product, + aas.taxon_id, + + taxon.orthomcl_taxon_id, + taxon.taxon_group, + taxon.orthomcl_abbrev AS taxon_abbreviation, + taxon.name AS organism_name, + taxon.core_peripheral, + + o.group_id AS group_name, + o.ortholog_group_id, + o.number_of_members AS group_size, + o.number_of_core_members, + o.number_of_peripheral_members, + CASE is_residual WHEN 1 THEN 'Residual' + ELSE 'Core' END AS group_type, + urls.source_url, + urls.source_text + FROM + dots.AASequence aas, + apidb.orthologGroup o, + apidb.orthologGroupAASequence ogseq, + ( + SELECT o.orthomcl_abbrev, + o.taxon_id as orthomcl_taxon_id, + t.name, + t.core_peripheral, + t.taxon_group + FROM apidb.organism o, + (WITH RECURSIVE TaxonHierarchy AS ( + SELECT + three_letter_abbrev, + orthomcl_clade_id, + name, + core_peripheral, + name AS taxon_group, + parent_id + FROM apidb.OrthomclClade + WHERE name IN ('Archaea', 'Bacteria', 'Alveolates', 'Amoebozoa', 'Euglenozoa', + 'Fungi', 'Metazoa', 'Other Eukaryota', 'Viridiplantae') + UNION ALL + SELECT + child.three_letter_abbrev, + child.orthomcl_clade_id, + child.name, + child.core_peripheral, + parent.taxon_group, + child.parent_id + FROM apidb.OrthomclClade child + JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id + ) + SELECT three_letter_abbrev, taxon_group, name, core_peripheral + FROM TaxonHierarchy + WHERE core_peripheral IN ('C', 'P') + ) t + WHERE t.three_letter_abbrev = o.orthomcl_abbrev + ) taxon, + + ( + SELECT aas.aa_sequence_id, + CASE + WHEN ores.resource_name IN ('AmoebaDB','CryptoDB','FungiDB','GiardiaDB','HostDB','MicrosporidiaDB', + 'PlasmoDB','PiroplasmaDB','ToxoDB','TrichDB','TriTrypDB','VectorBase') + THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/downloads')) || 'record/gene/' + || aas.source_id + WHEN ores.resource_name = 'Uniprot' + THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/proteomes') ) || 'uniprot/' + || aas.source_id + ELSE '' END AS source_url, + CASE WHEN ores.resource_name IS NULL THEN '' + ELSE aas.source_id || ' (' || ores.resource_name || ')' END AS source_text + FROM dots.AaSequence aas, + apidb.organism ot, + apidb.orthomclresource ores + WHERE ot.taxon_id = ores.orthomcl_taxon_id + AND ot.taxon_id = aas.taxon_id) urls + WHERE aas.aa_sequence_id = ogseq.aa_sequence_id + AND ogseq.group_id = o.group_id + AND aas.aa_sequence_id = urls.aa_sequence_id + AND taxon.orthomcl_taxon_id = aas.taxon_id + AND aas.taxon_id in (select distinct(eas.taxon_id) from apidb.organism og, dots.aasequence eas where eas.taxon_id = og.taxon_id) +; + + alter table :SCHEMA.ProteinSequenceGroup + add constraint SeqAttrs_pk primary key (full_id) +; \ No newline at end of file diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql new file mode 100644 index 0000000000..4a136d10f4 --- /dev/null +++ b/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql @@ -0,0 +1,15 @@ + create unique index PSG_idx ON :SCHEMA.ProteinSequenceGroup (full_id, group_name, taxon_id, source_id) +; + + create unique index PSG_gusIdx ON :SCHEMA.ProteinSequenceGroup (ortholog_group_id, aa_sequence_id) +; + + create unique index PSG_idx2 ON :SCHEMA.ProteinSequenceGroup (group_name, length desc, full_id, taxon_id) +; + + create unique index PSG_idx3 + on :SCHEMA.ProteinSequenceGroup (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id) + ; + + create unique index PSG_idx4 ON :SCHEMA.ProteinSequenceGroup (source_id, full_id, group_name, taxon_id) +; From 399b744d87de9c2683e88d361ccfdfbbea6845b2 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Wed, 28 May 2025 12:44:31 -0400 Subject: [PATCH 082/112] fix taxonomy --- Model/lib/psql/webready/orgSpecific/Taxonomy.psql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql index 66a639f094..e0c1b7a037 100644 --- a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql +++ b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql @@ -1,7 +1,7 @@ :CREATE_AND_POPULATE WITH RECURSIVE cte AS ( - SELECT tax.*, name as organism, ARRAY[taxon_id::numeric] as path - FROM :SCHEMA.taxontree + SELECT tt.*, name as organism, ARRAY[taxon_id::numeric] as path + FROM :SCHEMA.taxontree tt WHERE taxon_id = :TAXON_ID UNION SELECT tt.*, cte.organism, cte.path || tt.taxon_id as path From a8afab3ce7b006f4bf080c85ec99166e3b34fb08 Mon Sep 17 00:00:00 2001 From: bindu Date: Mon, 12 May 2025 17:16:31 -0400 Subject: [PATCH 083/112] fix sql as REGEXP_LIKE and REGEXP_COUNT are available in pg --- Model/lib/wdk/model/records/geneAttributeQueries.xml | 4 +--- Model/lib/wdk/model/records/transcriptAttributeQueries.xml | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Model/lib/wdk/model/records/geneAttributeQueries.xml b/Model/lib/wdk/model/records/geneAttributeQueries.xml index 7f7472cb24..3d69efb4c8 100644 --- a/Model/lib/wdk/model/records/geneAttributeQueries.xml +++ b/Model/lib/wdk/model/records/geneAttributeQueries.xml @@ -245,11 +245,9 @@ GROUP BY source_id ]]> - - diff --git a/Model/lib/wdk/model/records/transcriptAttributeQueries.xml b/Model/lib/wdk/model/records/transcriptAttributeQueries.xml index 388f9f234a..594f09496b 100644 --- a/Model/lib/wdk/model/records/transcriptAttributeQueries.xml +++ b/Model/lib/wdk/model/records/transcriptAttributeQueries.xml @@ -348,11 +348,9 @@ 'TOXO' ]]> - - From 2f384f30a7f09beb1d6b236f48e195b82e2bfe8f Mon Sep 17 00:00:00 2001 From: bindu Date: Wed, 14 May 2025 13:58:31 -0400 Subject: [PATCH 084/112] uncomment TranscriptGenomicSequence --- Model/lib/xml/tuningManager/apiTuningManager.xml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml index 6a783fcb6d..bd5806a592 100644 --- a/Model/lib/xml/tuningManager/apiTuningManager.xml +++ b/Model/lib/xml/tuningManager/apiTuningManager.xml @@ -2582,11 +2582,10 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id) - The genomic sequence of each transcript. Used in the transcript record / gene record page. - - COMMENTING OUT DEPENDENCIES FOR NOW @@ -2712,7 +2711,7 @@ sub readClob { ---> + From 8ac19101b8cd78badbdd5834e8a4c49a9ff715fe Mon Sep 17 00:00:00 2001 From: bindu Date: Wed, 14 May 2025 20:50:13 -0400 Subject: [PATCH 085/112] fix dependency for TranscriptGenomicSequence --- Model/lib/xml/tuningManager/apiTuningManager.xml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml index bd5806a592..b5b854f82d 100644 --- a/Model/lib/xml/tuningManager/apiTuningManager.xml +++ b/Model/lib/xml/tuningManager/apiTuningManager.xml @@ -2582,7 +2582,6 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id) - The genomic sequence of each transcript. Used in the transcript record / gene record page. @@ -2590,7 +2589,6 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id) - Date: Thu, 15 May 2025 13:43:01 -0400 Subject: [PATCH 086/112] Fix SQL for 3D structure predictions section on gene record page --- .../wdk/model/records/geneTableQueries.xml | 43 ++++++++----------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/Model/lib/wdk/model/records/geneTableQueries.xml b/Model/lib/wdk/model/records/geneTableQueries.xml index 16928d821b..583236c5bf 100644 --- a/Model/lib/wdk/model/records/geneTableQueries.xml +++ b/Model/lib/wdk/model/records/geneTableQueries.xml @@ -3759,30 +3759,25 @@ from apidbTuning.AllGeneProducts From 4662e325ca4eba08672bc93c990124ebd37d3670 Mon Sep 17 00:00:00 2001 From: Paul Wilkinson Date: Fri, 16 May 2025 06:03:04 -0400 Subject: [PATCH 087/112] Edited SQL to postgres to fix scRNA-Seq section on gene record page --- .../wdk/model/records/geneTableQueries.xml | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/Model/lib/wdk/model/records/geneTableQueries.xml b/Model/lib/wdk/model/records/geneTableQueries.xml index 583236c5bf..bcdf99863c 100644 --- a/Model/lib/wdk/model/records/geneTableQueries.xml +++ b/Model/lib/wdk/model/records/geneTableQueries.xml @@ -1762,25 +1762,21 @@ from ( From d17270224ceed9a52eee3a2f9798e92b231ea834 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Wed, 28 May 2025 14:47:34 -0400 Subject: [PATCH 088/112] debug --- Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql index ef804fdad7..77068d1b49 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql @@ -169,7 +169,7 @@ LEFT JOIN :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp go ON tas.aa_sequence_id = go.aa_sequence_id LEFT JOIN ( SELECT aa_sequence_id, string_agg(peptide_sequence, ', ') peptide_sequence - FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :SCHEMA.:ORG_ABBREVSignalPeptideDomains) t + FROM (SELECT DISTINCT aa_sequence_id, peptide_sequence FROM :SCHEMA.SignalPeptideDomains where org_abbrev = ':ORG_ABBREV') t GROUP BY aa_sequence_id ) sigp ON tas.aa_sequence_id = sigp.aa_sequence_id LEFT JOIN ( @@ -207,7 +207,7 @@ - update :SCHEMA.:ORG_ABBREVProteinAttributes gaup + update :SCHEMA.ProteinAttributes_:ORG_ABBREV gaup set has_seqedit = 1 where source_id in (select source_id from apidb.seqedit) From 783a97d6c8844ecf88192b3a01b91382db1e1cd1 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Wed, 28 May 2025 15:44:37 -0400 Subject: [PATCH 089/112] debug --- Model/lib/psql/webready/comparative/AlphaFoldGenes.psql | 2 +- .../lib/psql/webready/comparative/GroupPhylogeneticProfile.psql | 2 +- Model/lib/psql/webready/comparative/OrthologousTranscripts.psql | 2 +- .../lib/psql/webready/comparative/ProteinDomainAssignment.psql | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql index 2e9c2c7e34..56d3a8f373 100644 --- a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql +++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql @@ -51,7 +51,7 @@ ; drop table if exists :SCHEMA.minrank - +; CREATE UNLOGGED TABLE :SCHEMA.minRank AS ( SELECT gene_source_id , MIN(rank) as min_rank diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql index ddb118b5e4..dcbb9370d4 100644 --- a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql +++ b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql @@ -3,7 +3,7 @@ SELECT rep.orthomcl_name, pp.profile_string FROM apidb.PhylogeneticProfile pp, (SELECT orthomcl_name, max(source_id) as source_id - FROM :SCHEM.GeneAttributes + FROM :SCHEMA.GeneAttributes GROUP BY orthomcl_name) rep WHERE rep.source_id = pp.source_id diff --git a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql index e4766f036f..983e5239c3 100644 --- a/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql +++ b/Model/lib/psql/webready/comparative/OrthologousTranscripts.psql @@ -35,7 +35,7 @@ and ota.taxon_id = o.taxon_id ), syn_pairs - as (select na_feature_id, syn_na_feature_id, 1 as is_syntenic from SyntenicPairs + as (select na_feature_id, syn_na_feature_id, 1 as is_syntenic from :SCHEMA.SyntenicPairs ) select all_pairs.* , coalesce(syn_pairs.is_syntenic, 0) as is_syntenic diff --git a/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql index ce18928333..5ba8d01290 100644 --- a/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql +++ b/Model/lib/psql/webready/comparative/ProteinDomainAssignment.psql @@ -11,7 +11,7 @@ and upper(r.interpro_db_name) = 'PFAM' ; - create index domain_accession_ix&1 + create index domain_accession_ix on :SCHEMA.ProteinDomainAssignment (accession, full_id, group_name) ; From 8b3a7bd55caa9ce1c2b4c495e0ef89ea151f4e48 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Wed, 28 May 2025 15:54:28 -0400 Subject: [PATCH 090/112] debug --- Model/lib/psql/webready/comparative/AlphaFoldGenes.psql | 2 +- .../lib/psql/webready/comparative/GroupDomainDescriptions.psql | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql index 56d3a8f373..b442e08705 100644 --- a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql +++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql @@ -92,6 +92,6 @@ ; drop table if exists :SCHEMA.uniprotgenes; - drop table if exists :SCHEMA.minrank + drop table if exists :SCHEMA.minrank; drop table if exists :SCHEMA.alphafoldhits; diff --git a/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql index f5d73f6208..919cb8a61a 100644 --- a/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql +++ b/Model/lib/psql/webready/comparative/GroupDomainDescriptions.psql @@ -17,4 +17,5 @@ FROM apidb.OrthologGroup og, ORDER BY 1 ) ag WHERE og.group_id = ag.group_name -) + +; \ No newline at end of file From a60fd41f47368f1ce3171e5e3ee91b78a8011c64 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 29 May 2025 11:34:17 -0400 Subject: [PATCH 091/112] debug --- Model/lib/psql/webready/comparative/AlphaFoldGenes.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql index b442e08705..a0702a3dfa 100644 --- a/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql +++ b/Model/lib/psql/webready/comparative/AlphaFoldGenes.psql @@ -1,6 +1,6 @@ drop table if exists :SCHEMA.uniprotgenes; - CREATE TABLE :SCHEMA.uniprotGenes AS + CREATE UNLOGGED TABLE :SCHEMA.uniprotGenes AS SELECT DISTINCT ed.name , d.* , edr.version From c38468993b29473e8bdb4880b5e8c161ab63f5e4 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 29 May 2025 17:05:22 -0400 Subject: [PATCH 092/112] fix taxonid bug --- Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql | 2 +- Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql | 2 +- Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql index 29964ad4b5..09f86475ac 100644 --- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql @@ -63,7 +63,7 @@ WHERE sequence.taxon_id = taxon.taxon_id AND sequence.sequence_ontology_id = so.ontology_term_id - AND (sequence.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) + AND (sequence.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0) AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle') ORDER BY organism, source_id diff --git a/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql b/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql index 6da04c90fb..e5b60d5e59 100644 --- a/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql +++ b/Model/lib/psql/webready/orgSpecific/SequencePieceClosure.psql @@ -13,7 +13,7 @@ 1 AS edge_level FROM dots.SequencePiece sp, dots.NaSequence ns WHERE sp.piece_na_sequence_id = ns.na_sequence_id - AND (ns.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) + AND (ns.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0) ; diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql index abe0104ceb..1e47c1624b 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql @@ -204,7 +204,7 @@ ) olds ON gf.na_feature_id = olds.na_feature_id WHERE nl.is_top_level = 1 AND nl.feature_type = 'GeneFeature' - AND (gsa.taxon_id::varchar = ':TAXON_IDValue' OR length(':TAXON_IDValue') = 0) + AND (gsa.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0) AND species_name.name_class = 'scientific name' AND (gf.is_predicted != 1 OR gf.is_predicted is null) AND tn.name_class = 'scientific name' From 98410cb898024711bc2b5ba412b166d72d350080 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Mon, 2 Jun 2025 11:12:37 -0400 Subject: [PATCH 093/112] debug --- Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql index 77068d1b49..edf94bd30e 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql @@ -207,9 +207,10 @@ - update :SCHEMA.ProteinAttributes_:ORG_ABBREV gaup + update :SCHEMA.ProteinAttributes gaup set has_seqedit = 1 where source_id in (select source_id from apidb.seqedit) + and org_abbrev = ':ORG_ABBREV' ; From a37bae33f0fcee69cec834d86f925c39ce3e1712 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Mon, 2 Jun 2025 12:40:57 -0400 Subject: [PATCH 094/112] debug --- Model/lib/psql/webready/orgSpecific/GeneId.psql | 4 ++-- .../lib/psql/webready/orgSpecific/GeneIntronJunction.psql | 8 ++++---- .../lib/psql/webready/orgSpecific/ProteinAttributes.psql | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/GeneId.psql b/Model/lib/psql/webready/orgSpecific/GeneId.psql index 315917f872..ec39a78b71 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneId.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneId.psql @@ -13,7 +13,7 @@ create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureTmp as ) ; -create index GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id) +create index :ORG_ABBREV_GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp (na_feature_id) ; @@ -261,7 +261,7 @@ create index GeneFeatureTmp_na_feature_id ON :SCHEMA.:ORG_ABBREVGeneFeatureTmp - CREATE UNIQUE INDEX gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id) + CREATE UNIQUE INDEX :ORG_ABBREV_gix_pk ON :SCHEMA.:ORG_ABBREVOneGeneIdsTmp (lower_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql index 1f42b544b4..9cf983cd01 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql @@ -29,7 +29,7 @@ - CREATE UNIQUE INDEX annottmpnew_pk_ix ON annotgij (na_sequence_id,start_min,end_max,is_reversed,feature_type) + CREATE UNIQUE INDEX :ORG_ABBREV_annottmpnew_pk_ix ON annotgij (na_sequence_id,start_min,end_max,is_reversed,feature_type) ; @@ -50,7 +50,7 @@ - CREATE UNIQUE INDEX gnattidloc_pk_ix ON GeneIdLocGIJ (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression) + CREATE UNIQUE INDEX :ORG_ABBREV_gnattidloc_pk_ix ON GeneIdLocGIJ (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression) ; @@ -82,7 +82,7 @@ - create index gnidloc_nafid_ix on GeneIdLocGIJ (na_feature_id) + create index :ORG_ABBREV_gnidloc_nafid_ix on GeneIdLocGIJ (na_feature_id) ; @@ -183,7 +183,7 @@ - create index gijtmp_gnscid_ix on gijtmp (gene_source_id) + create index :ORG_ABBREV_gijtmp_gnscid_ix on gijtmp (gene_source_id) ; diff --git a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql index edf94bd30e..14be4666d1 100644 --- a/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/ProteinAttributes.psql @@ -80,7 +80,7 @@ - create index ProteinGoAttr_aaSequenceId ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id) + create index ProteinGoAttr_aaSequenceId_:ORG_ABBREV ON :SCHEMA.:ORG_ABBREVProteinGoAttributes_tmp (aa_sequence_id) ; From b6e139b8c21102e691b5d7fc8f892e8d4d4cbc46 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Mon, 2 Jun 2025 19:51:05 -0400 Subject: [PATCH 095/112] remove ortho .psql --- .../webready/global/GroupDomainAttribute.psql | 24 ----- .../global/GroupDomainAttribute_ix.psql | 2 - .../webready/global/SequenceAttributes.psql | 100 ------------------ .../global/SequenceAttributes_ix.psql | 31 ------ 4 files changed, 157 deletions(-) delete mode 100644 Model/lib/psql/webready/global/GroupDomainAttribute.psql delete mode 100644 Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql delete mode 100644 Model/lib/psql/webready/global/SequenceAttributes.psql delete mode 100644 Model/lib/psql/webready/global/SequenceAttributes_ix.psql diff --git a/Model/lib/psql/webready/global/GroupDomainAttribute.psql b/Model/lib/psql/webready/global/GroupDomainAttribute.psql deleted file mode 100644 index e9f869535b..0000000000 --- a/Model/lib/psql/webready/global/GroupDomainAttribute.psql +++ /dev/null @@ -1,24 +0,0 @@ -CREATE TABLE SCHEMA.GroupDomainAttribute AS -( -SELECT og.group_id AS group_name, ag.descriptions -FROM apidb.OrthologGroup og, - (SELECT group_name, - STRING_AGG(accession ||' (' || num_proteins|| ')', ', ') AS descriptions - FROM (SELECT group_name, accession, num_proteins, rnk - FROM (SELECT group_name, accession, num_proteins, - rank() OVER (PARTITION BY group_name ORDER BY num_proteins DESC) rnk - FROM (SELECT group_name, accession, count(distinct full_id) AS num_proteins - FROM SCHEMA.DomainAssignment - GROUP BY group_name,accession - ) - ) - WHERE rnk <= 3 - ) - GROUP BY group_name - ORDER BY 1 - ) ag -WHERE og.group_id = ag.group_name -) -; - - diff --git a/Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql b/Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql deleted file mode 100644 index b796bb25e4..0000000000 --- a/Model/lib/psql/webready/global/GroupDomainAttribute_ix.psql +++ /dev/null @@ -1,2 +0,0 @@ -CREATE INDEX GroupDomainAttribute_idx ON :SCHEMA.GroupDomainAttribute (group_name) - ; diff --git a/Model/lib/psql/webready/global/SequenceAttributes.psql b/Model/lib/psql/webready/global/SequenceAttributes.psql deleted file mode 100644 index 77e8aedba8..0000000000 --- a/Model/lib/psql/webready/global/SequenceAttributes.psql +++ /dev/null @@ -1,100 +0,0 @@ - - - - create table SequenceAttributes as - SELECT - aas.source_id AS full_id, - aas.source_id, - aas.aa_sequence_id, - length(aas.sequence) as length, - aas.description AS product, - aas.taxon_id, - - taxon.orthomcl_taxon_id, - taxon.taxon_group, - taxon.orthomcl_abbrev AS taxon_abbreviation, - taxon.name AS organism_name, - taxon.core_peripheral, - - o.group_id AS group_name, - o.ortholog_group_id, - o.number_of_members AS group_size, - o.number_of_core_members, - o.number_of_peripheral_members, - CASE is_residual WHEN 1 THEN 'Residual' - ELSE 'Core' END AS group_type, - urls.source_url, - urls.source_text - FROM - dots.AASequence aas, - apidb.orthologGroup o, - apidb.orthologGroupAASequence ogseq, - ( - SELECT o.orthomcl_abbrev, - o.taxon_id as orthomcl_taxon_id, - t.name, - t.core_peripheral, - t.taxon_group - FROM apidb.organism o, - (WITH RECURSIVE TaxonHierarchy AS ( - SELECT - three_letter_abbrev, - orthomcl_clade_id, - name, - core_peripheral, - name AS taxon_group, - parent_id - FROM apidb.OrthomclClade - WHERE name IN ('Archaea', 'Bacteria', 'Alveolates', 'Amoebozoa', 'Euglenozoa', - 'Fungi', 'Metazoa', 'Other Eukaryota', 'Viridiplantae') - UNION ALL - SELECT - child.three_letter_abbrev, - child.orthomcl_clade_id, - child.name, - child.core_peripheral, - parent.taxon_group, - child.parent_id - FROM apidb.OrthomclClade child - JOIN TaxonHierarchy parent ON child.parent_id = parent.orthomcl_clade_id - ) - SELECT three_letter_abbrev, taxon_group, name, core_peripheral - FROM TaxonHierarchy - WHERE core_peripheral IN ('C', 'P') - ) t - WHERE t.three_letter_abbrev = o.orthomcl_abbrev - ) taxon, - - ( - SELECT aas.aa_sequence_id, - CASE - WHEN ores.resource_name IN ('AmoebaDB','CryptoDB','FungiDB','GiardiaDB','HostDB','MicrosporidiaDB', - 'PlasmoDB','PiroplasmaDB','ToxoDB','TrichDB','TriTrypDB','VectorBase') - THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/downloads')) || 'record/gene/' - || aas.source_id - WHEN ores.resource_name = 'Uniprot' - THEN SUBSTR(ores.resource_url, 0, strpos(ores.resource_url, '/proteomes') ) || 'uniprot/' - || aas.source_id - ELSE '' END AS source_url, - CASE WHEN ores.resource_name IS NULL THEN '' - ELSE aas.source_id || ' (' || ores.resource_name || ')' END AS source_text - FROM dots.AaSequence aas, - apidb.organism ot, - apidb.orthomclresource ores - WHERE ot.taxon_id = ores.orthomcl_taxon_id - AND ot.taxon_id = aas.taxon_id) urls - WHERE aas.aa_sequence_id = ogseq.aa_sequence_id - AND ogseq.group_id = o.group_id - AND aas.aa_sequence_id = urls.aa_sequence_id - AND taxon.orthomcl_taxon_id = aas.taxon_id - AND aas.taxon_id in (select distinct(eas.taxon_id) from apidb.organism og, dots.aasequence eas where eas.taxon_id = og.taxon_id) - - ; - - - - alter table :SCHEMA.SequenceAttributes - add constraint SeqAttrs_pk primary key (full_id) - - ; - diff --git a/Model/lib/psql/webready/global/SequenceAttributes_ix.psql b/Model/lib/psql/webready/global/SequenceAttributes_ix.psql deleted file mode 100644 index 88e66acdde..0000000000 --- a/Model/lib/psql/webready/global/SequenceAttributes_ix.psql +++ /dev/null @@ -1,31 +0,0 @@ - - - create unique index SeqAttrs_idx ON :SCHEMA.SequenceAttributes (full_id, group_name, taxon_id, source_id) - - ; - - - - create unique index SeqAttrs_gusIdx ON :SCHEMA.SequenceAttributes (ortholog_group_id, aa_sequence_id) - - ; - - - - create unique index SeqAttrs_idx2 ON :SCHEMA.SequenceAttributes (group_name, length desc, full_id, taxon_id) - - ; - - - - create unique index SeqAttrs_idx3 - on :SCHEMA.SequenceAttributes (aa_sequence_id, group_name, ortholog_group_id, orthomcl_taxon_id, taxon_id) - - ; - - - - create unique index SeqAttrs_idx4 ON :SCHEMA.SequenceAttributes (source_id, full_id, group_name, taxon_id) - - ; - From 223b6609e0e123aa075ec449522f08908d9cc14a Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Tue, 3 Jun 2025 15:50:57 -0400 Subject: [PATCH 096/112] need to define taxon tree org specific to handle temp taxon ids --- .../psql/webready/orgSpecific/Taxonomy.psql | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql index e0c1b7a037..45422830bc 100644 --- a/Model/lib/psql/webready/orgSpecific/Taxonomy.psql +++ b/Model/lib/psql/webready/orgSpecific/Taxonomy.psql @@ -1,11 +1,23 @@ +DROP TABLE IF EXISTS :SCHEMA.TaxonTree_:ORG_ABBREV; + +CREATE TABLE :SCHEMA.TaxonTree_:ORG_ABBREV as + SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id, + cast(tn.name as varchar(80)) as name, + cast(t.rank as varchar(24)) as rank + FROM sres.Taxon t, sres.TaxonName tn + WHERE t.taxon_id = tn.taxon_id + AND tn.name_class = 'scientific name' + AND t.taxon_id = :TAXON_ID + ; + :CREATE_AND_POPULATE WITH RECURSIVE cte AS ( SELECT tt.*, name as organism, ARRAY[taxon_id::numeric] as path - FROM :SCHEMA.taxontree tt + FROM :SCHEMA.TaxonTree_:ORG_ABBREV tt WHERE taxon_id = :TAXON_ID UNION SELECT tt.*, cte.organism, cte.path || tt.taxon_id as path - FROM :SCHEMA.taxontree tt, cte + FROM :SCHEMA.TaxonTree_:ORG_ABBREV tt, cte WHERE cte.parent_id = tt.taxon_id AND tt.name != 'root' ) @@ -13,3 +25,6 @@ ':PROJECT_ID' as project_id, ':ORG_ABBREV' as org_abbrev, current_timestamp as modification_date FROM (SELECT cte.* FROM cte ORDER BY path) t :DECLARE_PARTITION; + + +DROP TABLE :SCHEMA.TaxonTree_:ORG_ABBREV; From 9083af198f65e0274803d2ad70515e67ccebd277 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 3 Jun 2025 20:35:06 -0400 Subject: [PATCH 097/112] remove taxontree --- Model/lib/psql/webready/global/TaxonTree.psql | 7 ------- Model/lib/psql/webready/global/TaxonTree_ix.psql | 2 -- 2 files changed, 9 deletions(-) delete mode 100644 Model/lib/psql/webready/global/TaxonTree.psql delete mode 100644 Model/lib/psql/webready/global/TaxonTree_ix.psql diff --git a/Model/lib/psql/webready/global/TaxonTree.psql b/Model/lib/psql/webready/global/TaxonTree.psql deleted file mode 100644 index d09a10167a..0000000000 --- a/Model/lib/psql/webready/global/TaxonTree.psql +++ /dev/null @@ -1,7 +0,0 @@ - CREATE TABLE :SCHEMA.TaxonTree as - SELECT t.taxon_id, t.parent_id, t.ncbi_tax_id, - cast(tn.name as varchar(80)) as name, - cast(t.rank as varchar(24)) as rank - FROM sres.Taxon t, sres.TaxonName tn - WHERE t.taxon_id = tn.taxon_id - AND tn.name_class = 'scientific name' diff --git a/Model/lib/psql/webready/global/TaxonTree_ix.psql b/Model/lib/psql/webready/global/TaxonTree_ix.psql deleted file mode 100644 index 4e31478a22..0000000000 --- a/Model/lib/psql/webready/global/TaxonTree_ix.psql +++ /dev/null @@ -1,2 +0,0 @@ - CREATE INDEX taxontree_idx ON :SCHEMA.taxontree (taxon_id, name) - ; From 9124a0db8266a415a120daf0e8891e4830fca7bc Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Tue, 3 Jun 2025 20:48:07 -0400 Subject: [PATCH 098/112] fix taxonspecies --- .../webready/orgSpecific/TaxonSpecies.psql | 28 ++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql b/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql index b7d15cbbdc..22bf73943a 100644 --- a/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql +++ b/Model/lib/psql/webready/orgSpecific/TaxonSpecies.psql @@ -1,10 +1,29 @@ +drop table if exists :SCHEMA.taxonOfInterest_:ORG_ABBREV; + +create unlogged table :SCHEMA.taxonOfInterest_:ORG_ABBREV as +select distinct ens.taxon_id +from dots.est e,apidb.datasource ds, apidb.organism o, + sres.ExternalDatabaseRelease edr, dots.ExternalNaSequence ens, + sres.externaldatabase ed, sres.ontologyterm oterm +WhERE e.na_sequence_id = ens.na_sequence_id + AND ens.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + AND ens.sequence_ontology_id = oterm.ontology_term_id + AND ed.name = ds.name + and ds.taxon_id = o.taxon_id + and o.is_reference_strain = 1 + and o.taxon_id = :TAXON_ID + union + select :TAXON_ID +; + -- recursively walk taxon tree to find ancestor with rank "species" -- Update this to select max/min level with rank species if there are multiple :CREATE_AND_POPULATE WITH RECURSIVE cte AS ( SELECT TAXON_ID, taxon_id as parent_id, 1 as lvl FROM sres.taxon - WHERE taxon_id = :TAXON_ID + WHERE taxon_id IN (SELECT taxon_id from :SCHEMA.taxonOfInterest_:ORG_ABBREV) UNION ALL SELECT cte.taxon_id, sub.parent_id, lvl + 1 FROM cte, sres.taxon sub @@ -16,8 +35,9 @@ current_timestamp as modification_date FROM cte c, sres.taxon t WHERE t.taxon_id = c.parent_id - AND t.rank='species' - - + AND t.rank='species' :DECLARE_PARTITION; +drop table if exists :SCHEMA.taxonOfInterest_:ORG_ABBREV; + + From 590b6b723a024de8b75796280332b5b8a333a95e Mon Sep 17 00:00:00 2001 From: John Brestelli Date: Wed, 4 Jun 2025 15:39:04 -0400 Subject: [PATCH 099/112] add notes --- Model/lib/xml/tuningManager/webtables.org | 234 +++++++++++++--------- 1 file changed, 135 insertions(+), 99 deletions(-) diff --git a/Model/lib/xml/tuningManager/webtables.org b/Model/lib/xml/tuningManager/webtables.org index d622483647..2a4be5495e 100644 --- a/Model/lib/xml/tuningManager/webtables.org +++ b/Model/lib/xml/tuningManager/webtables.org @@ -3,140 +3,176 @@ * MO Tables - Organism - - [X] OrganismAbbreviationBlast_ix.psql - - [X] OrganismAbbreviationBlast.psql + - [X] OrganismAbbreviationBlast_ix + - [X] OrganismAbbreviationBlast - move to KEEP - - [X] OrganismAbbreviation_ix.psql - - [X] OrganismAbbreviation.psql + - [X] OrganismAbbreviation_ix + - [X] OrganismAbbreviation - updated the abbreviation field to name_for_filenames - - [X] OrganismSelectTaxonRank_ix.psql - - [X] OrganismSelectTaxonRank.psql - - [X] Taxonomy_ix.psql - - [X] Taxonomy.psql - - [X] TaxonSpecies_ix.psql - - [X] TaxonSpecies.psql + - [X] OrganismSelectTaxonRank_ix + - [X] OrganismSelectTaxonRank + - [X] Taxonomy_ix + - [X] Taxonomy + - [X] TaxonSpecies_ix + - [X] TaxonSpecies - Genomic Sequence - - [X] GenomicSequenceId_ix.psql - - [X] GenomicSequenceId.psql - - [X] GenomicSequenceSequence_ix.psql - - [X] GenomicSequenceSequence.psql + - [X] GenomicSequenceId_ix + - [X] GenomicSequenceId + - [X] GenomicSequenceSequence_ix + - [X] GenomicSequenceSequence - [X] SequencePieceClosure - [X] GenomicSeqAttributes - [s] SequenceEnzymeClass - Temp remove this and eventually Move to ComparativeGenomics because it depends on the OrthoMCL Derived EC Numbers - Transcript / Protein - - [X] SignalPeptideDomains_ix.psql - - [X] SignalPeptideDomains.psql - - [X] TransmembraneDomains_ix.psql - - [X] TransmembraneDomains.psql - - [X] PdbSimilarity_ix.psql - - [X] PdbSimilarity.psql - - [X] ProteinSequence_ix.psql - - [X] ProteinSequence.psql - - [X] ProteinAttributes_ix.psql - - [X] ProteinAttributes.psql - - [ ] TranscriptAttributes_ix.psql - - [ ] TranscriptAttributes.psql + - [X] SignalPeptideDomains_ix + - [X] SignalPeptideDomains + - [X] TransmembraneDomains_ix + - [X] TransmembraneDomains + - [X] PdbSimilarity_ix + - [X] PdbSimilarity + - [X] ProteinSequence_ix + - [X] ProteinSequence + - [ ] ProteinAttributes_ix + - [ ] Remove Derived EC Numbers + - [ ] New Tuning table(s) for EC Derived ECs per protein (gene and transcript) + - [X] ProteinAttributes + - [ ] TranscriptAttributes_ix + - [ ] TranscriptAttributes - no longer has products column because that is done by TM - - [X] CodingSequence_ix.psql - - [X] CodingSequence.psql - - [X] IntronUtrCoords_ix.psql - - [X] IntronUtrCoords.psql - - [X] TranscriptCenDistance_ix.psql - - [X] TranscriptCenDistance.psql - - [ ] TranscriptPathway_ix.psql - - [ ] TranscriptPathway.psql + - no longer has derived ec numbers (move to comparative genomics) + - [X] CodingSequence_ix + - [X] CodingSequence + - [X] IntronUtrCoords_ix + - [X] IntronUtrCoords + - [X] TranscriptCenDistance_ix + - [X] TranscriptCenDistance + - [ ] TranscriptPathway_ix + - [ ] TranscriptPathway - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - - [X] TranscriptSequence_ix.psql - - [X] TranscriptSequence.psql - - [X] ChIPchipTranscript_ix.psql - - [X] ChIPchipTranscript.psql + - [X] TranscriptSequence_ix + - [X] TranscriptSequence + - [X] ChIPchipTranscript_ix + - [X] ChIPchipTranscript - Gene - - [X] GeneId_ix.psql - - [X] GeneId.psql - - [X] GeneAttributes_ix.psql - - [X] GeneAttributes.psql - - fix gene_product - - [X] GeneCopyNumbers_ix.psql - - [X] GeneCopyNumbers.psql - - [X] GeneGoTable_ix.psql - - [X] GeneGoTable.psql - - [X] GeneGoTerms_ix.psql - - [X] GeneGoTerms.psql - - [X] GeneLocations_ix.psql - - [X] GeneLocations.psql - - [X] GeneModelDump_ix.psql - - [X] GeneModelDump.psql - - [X] GeneSummaryFilter_ix.psql - - [X] GeneSummaryFilter.psql - - [X] TFBSGene_ix.psql - - [X] TFBSGene.psql + - [X] GeneId_ix + - [X] GeneId + - [X] GeneAttributes_ix + - [X] GeneAttributes + - remove gene_product and remove orthomclname + - [X] GeneCopyNumbers_ix + - [X] GeneCopyNumbers + - [X] GeneGoTable_ix + - [X] GeneGoTable + - [X] GeneGoTerms_ix + - [X] GeneGoTerms + - [X] GeneLocations_ix + - [X] GeneLocations + - [X] GeneModelDump_ix + - [X] GeneModelDump + - [X] GeneSummaryFilter_ix + - [X] GeneSummaryFilter + - [X] TFBSGene_ix + - [X] TFBSGene - removed aef.* - - [ ] PathwayNodeGene_ix.psql + - [ ] PathwayNodeGene_ix - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - - [ ] PathwayNodeGene.psql + - [ ] PathwayNodeGene - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - - [ ] PathwaysGeneTable_ix.psql + - [ ] PathwaysGeneTable_ix - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - - [ ] PathwaysGeneTable.psql + - [ ] PathwaysGeneTable - This may need to move to comparative genomics because we need the OrthoDerived EC mappings - - [X] GoTermSummary_ix.psql - - [X] GoTermSummary.psql - - [X] EqtlSpan_ix.psql - - [X] EqtlSpan.psql + - [X] GoTermSummary_ix + - [X] GoTermSummary + - [X] EqtlSpan_ix + - [X] EqtlSpan - EST - - [ ] EstAttributes_ix.psql + - [X] EstAttributes_ix - move to comparative - join to apidb.organism and filter by "is_reference_strain" - - [ ] EstAttributes.psql + - [X] EstAttributes - move to comparative - join to apidb.organism and filter by "is_reference_strain" - - [ ] EstSequence_ix.psql + - [X] EstSequence_ix - move to comparative - join to apidb.organism and filter by "is_reference_strain" - - [ ] EstSequence.psql + - [X] EstSequence - move to comparative - join to apidb.organism and filter by "is_reference_strain" - - [X] EstAlignmentGeneSummary_ix.psql - - [X] EstAlignmentGeneSummary.psql + - [X] EstAlignmentGeneSummary_ix + - [X] EstAlignmentGeneSummary - Dataset / Other - - [X] DatasetExampleSourceId_ix.psql - - [X] DatasetExampleSourceId.psql + - [X] DatasetExampleSourceId_ix + - [X] DatasetExampleSourceId - NOTE: this depends on Profiles - - [X] PANExtDBRls.psql - - [X] PANIO.psql - - [X] PANIO_ix.psql + - [X] PANExtDBRls + - [X] PANIO + - [X] PANIO_ix - - [ ] +ProfileType_ix.psql+ - - [ ] +ProfileType.psql+ - - [ ] +Profile_ix.psql+ - - [ ] +Profile.psql+ - - [ ] +ProfileSamples_ix.psql+ - - [ ] +ProfileSamples.psql+ + - [ ] +ProfileType_ix+ + - [ ] +ProfileType+ + - [ ] +Profile_ix+ + - [ ] +Profile+ + - [ ] +ProfileSamples_ix+ + - [ ] +ProfileSamples+ - - [X] RnaSeqStats_ix.psql - - [X] RnaSeqStats.psql - - [X] OrganismAttributes_ix.psql - - [X] OrganismAttributes.psql + - [X] RnaSeqStats_ix + - [X] RnaSeqStats + - [X] OrganismAttributes_ix + - [X] OrganismAttributes - removed ESTs and SNPs - - [X] ChrCopyNumbers_ix.psql - - [X] ChrCopyNumbers.psql + - [X] ChrCopyNumbers_ix + - [X] ChrCopyNumbers - Junctions (Kathryn) - - [ ] IntronSupportLevel_ix.psql - - [ ] IntronSupportLevel.psql - - [ ] GeneIntJuncStats_ix.psql - - [ ] GeneIntJuncStats.psql - - [ ] GeneIntronJunction_ix.psql - - [ ] GeneIntronJunction.psql - - [ ] NameMappingGIJ_ix.psql - - [ ] NameMappingGIJ.psql - - needs to be in TM, depends on dataset presenters - - [ ] GeneMaxIntronGIJ_ix.psql + - [ ] IntronSupportLevel_ix + - [ ] IntronSupportLevel + - [ ] GeneIntJuncStats_ix + - [ ] GeneIntJuncStats + - [ ] GeneIntronJunction_ix + - [ ] GeneIntronJunction + - [ ] NameMappingGIJ_ix + - [ ] NameMappingGIJ + - needs to be in TM, depends on dataset presenters + - [ ] GeneMaxIntronGIJ_ix - should be aux table and dropped - - [ ] GeneMaxIntronGIJ.psql + - [ ] GeneMaxIntronGIJ - should be aux table and dropped + +- Comparative + - [ ] ProteinGroup (RENMAED from SEQUENCEATTRIBUTES) + - add orthomcl derived ec numbers here + - add gene_id + - add transcript_id + - ENSURE this has a row for every protein! + - [ ] ProteinGroupDomainAssignment (RENAMED from DOMAINASSIGNMENT) + + - [ ] GroupDomainDescriptions (RENAMED from GROUPDOMAINATTRIBUTE ) + - [ ] proteinGroupEnyzmeClass (renamed from sequenceenzymeclass) + - [ ] AlphaFoldGenes + - [ ] GroupPhylogeneticProfile + - [ ] OrthologousTranscripts + - [ ] PhyleticPattern + - no longer uses dots.sequencegroup and dots.sequencesequencegroup + +- Global + - [X] CompoundAttributes + - [X] CompoundId + - [X] CompoundProperties + - [ ] CompoundTypeAheads + - double check again after database is rebuilt. may be ok + - [X] OntologyLevels + - [X] PathwayAttributes + - [X] PathwayCompounds + - [X] PathwayNodes + - [X] PathwayReactions + +- KEEP Tuning table + - GeneProduct (CHECK) + - TranscriptProduct (NEW) + - ProteinProduct (NEW??) From 10002ed8d93a1ef7b113994953e105fea3e1a315 Mon Sep 17 00:00:00 2001 From: Richard Demko Date: Thu, 5 Jun 2025 11:00:55 -0400 Subject: [PATCH 100/112] New GeneOrthologGroup and TranscriptOrthologGroupTables --- .../comparative/GeneOrthologGroup.psql | 10 ++++++++++ .../comparative/TranscriptOrthologGroup.psql | 10 ++++++++++ .../wdk/model/questions/params/geneParams.xml | 18 +++++++++--------- 3 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 Model/lib/psql/webready/comparative/GeneOrthologGroup.psql create mode 100644 Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql new file mode 100644 index 0000000000..4c30361e4d --- /dev/null +++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql @@ -0,0 +1,10 @@ + create table :SCHEMA.GeneOrthologGroup as + SELECT pa.gene_source_id AS gene_id, + ogas.group_id, + FROM webready.proteinattributes pa, + apidb.orthologgroupaasequence ogas + WHERE pa.aa_sequence_id = ogas.aa_sequence_id +; + alter table :SCHEMA.GeneOrthologGroup + add constraint GeneOrthologGroup_pk primary key (gene_id) +; diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql new file mode 100644 index 0000000000..5362c8465e --- /dev/null +++ b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql @@ -0,0 +1,10 @@ + create table :SCHEMA.TranscriptOrthologGroup as + SELECT ta.source_id AS source_id, + ogas.group_id, + FROM webready.transcriptattributes ta, + apidb.orthologgroupaasequence ogas + WHERE ta.aa_sequence_id = ogas.aa_sequence_id +; + alter table :SCHEMA.TranscriptOrthologGroup + add constraint TranscriptOrthologGroup_pk primary key (source_id) +; diff --git a/Model/lib/wdk/model/questions/params/geneParams.xml b/Model/lib/wdk/model/questions/params/geneParams.xml index 142a6d1b63..4635e82211 100644 --- a/Model/lib/wdk/model/questions/params/geneParams.xml +++ b/Model/lib/wdk/model/questions/params/geneParams.xml @@ -8274,16 +8274,16 @@ products of your selected type (or types).


SELECT three_letter_abbrev as term, name as internal, name as display - FROM apidb.orthomcltaxon + FROM apidb.orthomclclade WHERE three_letter_abbrev not in ('BACI') ORDER BY depth_first_index ASC, three_letter_abbrev ASC From 49e87f694113c5b7a6ae4e4cb3fc5a948616822f Mon Sep 17 00:00:00 2001 From: bindu Date: Wed, 14 May 2025 13:58:31 -0400 Subject: [PATCH 101/112] uncomment TranscriptGenomicSequence --- Model/lib/xml/tuningManager/apiTuningManager.xml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml index 6a783fcb6d..bd5806a592 100644 --- a/Model/lib/xml/tuningManager/apiTuningManager.xml +++ b/Model/lib/xml/tuningManager/apiTuningManager.xml @@ -2582,11 +2582,10 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id)
- The genomic sequence of each transcript. Used in the transcript record / gene record page. - - COMMENTING OUT DEPENDENCIES FOR NOW @@ -2712,7 +2711,7 @@ sub readClob { ---> + From 8fc80129852b1b3f44fec5ad4c5b7f41fc17cff0 Mon Sep 17 00:00:00 2001 From: Richard Demko Date: Thu, 5 Jun 2025 11:00:55 -0400 Subject: [PATCH 102/112] New GeneOrthologGroup and TranscriptOrthologGroupTables --- .../comparative/GeneOrthologGroup.psql | 10 ++++++++++ .../comparative/TranscriptOrthologGroup.psql | 10 ++++++++++ .../wdk/model/questions/params/geneParams.xml | 18 +++++++++--------- 3 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 Model/lib/psql/webready/comparative/GeneOrthologGroup.psql create mode 100644 Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql new file mode 100644 index 0000000000..4c30361e4d --- /dev/null +++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql @@ -0,0 +1,10 @@ + create table :SCHEMA.GeneOrthologGroup as + SELECT pa.gene_source_id AS gene_id, + ogas.group_id, + FROM webready.proteinattributes pa, + apidb.orthologgroupaasequence ogas + WHERE pa.aa_sequence_id = ogas.aa_sequence_id +; + alter table :SCHEMA.GeneOrthologGroup + add constraint GeneOrthologGroup_pk primary key (gene_id) +; diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql new file mode 100644 index 0000000000..5362c8465e --- /dev/null +++ b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql @@ -0,0 +1,10 @@ + create table :SCHEMA.TranscriptOrthologGroup as + SELECT ta.source_id AS source_id, + ogas.group_id, + FROM webready.transcriptattributes ta, + apidb.orthologgroupaasequence ogas + WHERE ta.aa_sequence_id = ogas.aa_sequence_id +; + alter table :SCHEMA.TranscriptOrthologGroup + add constraint TranscriptOrthologGroup_pk primary key (source_id) +; diff --git a/Model/lib/wdk/model/questions/params/geneParams.xml b/Model/lib/wdk/model/questions/params/geneParams.xml index 142a6d1b63..4635e82211 100644 --- a/Model/lib/wdk/model/questions/params/geneParams.xml +++ b/Model/lib/wdk/model/questions/params/geneParams.xml @@ -8274,16 +8274,16 @@ products of your selected type (or types).


SELECT three_letter_abbrev as term, name as internal, name as display - FROM apidb.orthomcltaxon + FROM apidb.orthomclclade WHERE three_letter_abbrev not in ('BACI') ORDER BY depth_first_index ASC, three_letter_abbrev ASC From 9d44208351a3362b09b40890dc01e3e9f249c0eb Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 5 Jun 2025 12:17:41 -0400 Subject: [PATCH 103/112] add convert2webready --- convert2webready | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100755 convert2webready diff --git a/convert2webready b/convert2webready new file mode 100755 index 0000000000..d705233c9d --- /dev/null +++ b/convert2webready @@ -0,0 +1,82 @@ +#!/usr/bin/perl + +# grep -irl apidbtuning | ~/sourceCode/website/ApiCommonModel/convert2wr ~/sourceCode/website/ApiCommonModel/Model ~/sourceCode/ApiCommonModel/Model + +use strict; +my @tables = ( +'CompoundAttributes', +'CompoundId', +'CompoundProperties', +'CompoundTypeAheads', +'OntologyLevels', +'PathwayAttributes', +'PathwayCompounds', +'PathwayNodes', +'PathwayReactions', +'ChIPchipTranscript', +'ChrCopyNumbers', +'CodingSequence', +'EqtlSpan', +'EstAlignmentGeneSummary', +'EstAttributes', +'EstSequence', +'GeneAttributes', +'GeneCopyNumbers', +'GeneGoTable', +'GeneGoTerms', +'GeneId', +#'GeneIntJuncStats', +#'GeneIntronJunction', +'GeneLocations', +#'GeneMaxIntronGIJ', +'GeneModelDump', +'GeneSummaryFilter', +'GenomicSeqAttributes', +'GenomicSequenceId', +'GenomicSequenceSequence', +'GoTermSummary', +#'IntronSupportLevel', +#'IntronUtrCoords', +'OrganismAbbreviation', +'OrganismSelectTaxonRank', +'PANExtDbRls', +'PANIO', +'PathwayNodeGene', +'PathwaysGeneTable', +'PdbSimilarity', +'ProteinAttributes', +'ProteinSequence', +'RnaSeqStats', +'SequencePieceClosure', +'SignalPeptideDomains', +'Taxonomy', +'TaxonSpecies', +'TFBSGene', +'TranscriptAttributes', +'TranscriptCenDistance', +'TranscriptPathway', +'TranscriptSequence', +'TransmembraneDomains' +); + +my ($sourceModelDir, $targetModelDir) = @ARGV; + +while() { + chomp; + my $filenm = $_; + print STDERR "processing $filenm\n"; + my $filetext = do { + local $/ = undef; + open my $fh, "<", "$sourceModelDir/$filenm" + or die "could not open '$sourceModelDir/$filenm': $!"; + <$fh>; + }; + + foreach my $table (@tables) { + $filetext =~ s/apidbtuning.$table/webready.$table/gi; + } + + open(FH, '>', "$targetModelDir/$filenm") or die $!; + print FH $filetext; + close FH; +} From 2ee6f721b062a8d1cdc29aab109ef2754b2d39a0 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Thu, 5 Jun 2025 19:42:10 -0400 Subject: [PATCH 104/112] add TranscriptProduct --- .../xml/tuningManager/apiTuningManager.xml | 83 +++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml index 1ce105cbfe..2dd94f6212 100644 --- a/Model/lib/xml/tuningManager/apiTuningManager.xml +++ b/Model/lib/xml/tuningManager/apiTuningManager.xml @@ -3,6 +3,89 @@ + + + + + + + + + + + + + + Map each GO term that is assigned to at least one gene to a GoSubset term From 6e5deac4bd84b4f157b841436b348d33cde17b62 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Fri, 6 Jun 2025 10:47:57 -0400 Subject: [PATCH 105/112] restore apiTuningManager.xml --- .../tuningManager/apiTuningManager-pruned.xml | 3051 ++++++ .../xml/tuningManager/apiTuningManager.xml | 9516 ++++++++++++++--- 2 files changed, 11025 insertions(+), 1542 deletions(-) create mode 100644 Model/lib/xml/tuningManager/apiTuningManager-pruned.xml diff --git a/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml b/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml new file mode 100644 index 0000000000..2dd94f6212 --- /dev/null +++ b/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml @@ -0,0 +1,3051 @@ + + + + + + + + + + + + + + + + + + + + + + Map each GO term that is assigned to at least one gene to a GoSubset term + that is either itself or an immediate ancestor. By "immediate ancestor" + we mean an ancestor such that there isn't an intermediate ancestor also + in the subset. (Note that there can be multiple links as long as none is + in the subset.) This is currently restricted to 'goslim_generic', solely + by the condition in the SUBSET_TERM subquery. + + + + + + + + + + + + + + + Each row maps a dataset onto an ID for which the dataset contains data; + each dataset gets one such row. + Used in dataset record queries. + + + + + + + + + + + + Stores per-organism information. Used by the organism record, as well + as by project_id(), the function that maps an organism to a project. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 then 1 else 0 end as hasCentromere + FROM DOTS.MISCELLANEOUS f + , sres.ontologyTerm ot + , dots.nasequence s + WHERE ot.ontology_term_id = f.sequence_ontology_id + AND ot.name='centromere' + AND f.na_sequence_id = s.na_sequence_id + GROUP BY s.taxon_id + ]]> + + + + + + + + + + + + = g.start_min + AND g.na_sequence_id = seq.na_sequence_id + AND t.name = 'ExternalNASequence' + ) gene + ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id) + GROUP BY sim.taxon_id + ]]> + + + = 100 + AND s.is_best_alignment in (1) + AND s.percent_est_bases_aligned >= 20 + AND s.percent_identity >= 90 + AND e.best_alignment_count <= 1 + AND e.source_id = s.accession + GROUP by s.gene HAVING count(*) >= 1 + ) est ON ga.source_id = est.source_id + RIGHT OUTER JOIN ( + SELECT project_id, taxon_id, + max(database_version) as database_version, + CASE WHEN ncbi_tax_id > 9000000000 THEN NULL + ELSE ncbi_tax_id + END ncbi_tax_id, + to_char(sum(length)/1000000,'9999.99') as megabps + FROM GenomicSeqAttributes + WHERE is_top_level = 1 + GROUP BY project_ID, taxon_id, ncbi_tax_id + ) genomestat ON genomestat.taxon_id = ga.taxon_id + LEFT OUTER JOIN ( + SELECT count(distinct ga.source_id) as ct, ga.taxon_id + FROM GeneAttributes ga, SnpAttributes sf + WHERE sf.gene_source_id = ga.source_id + AND ga.is_deprecated = 0 + GROUP BY ga.taxon_id + ) snpCount ON ga.taxon_id = snpCount.taxon_id + GROUP BY genomestat.taxon_id, + genomestat.project_id, + genomestat.database_version, + genomestat.ncbi_tax_id, + genomestat.Megabps, + snpCount.ct + ]]> + + + 10000000 + -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' || + -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1" + -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits + then 'TMPTX_' || o.public_abbrev + else 'NCBITAXON_' || t.ncbi_tax_id + end as source_id, + o.abbrev as internal_abbrev, + o.public_abbrev, + o.orthomcl_abbrev, + o.family_name_for_files, + tn.name as organism_name, + o.genome_source, + o.strain_abbrev, + o.is_annotated_genome, + o.is_reference_strain, + o.is_family_representative, + o.name_for_filenames, + o.taxon_id as component_taxon_id, + gc.database_version, + gc.megabps as megabps, + gc.ncbi_tax_id as ncbi_tax_id, + gc.snpCount as snpCount, + gc.geneCount as geneCount, + gc.pseudoGeneCount as pseudoGeneCount, + gc.codingGeneCount as codingGeneCount, + gc.otherGeneCount as otherGeneCount, + gc.ChipChipGeneCount as ChipChipGeneCount, + gc.orthologCount as orthologCount, + gc.goCount as goCount, + gc.tfbsCount as tfbsCount, + gc.proteomicsCount as proteomicsCount, + gc.estCount as estCount, + gc.ecNumberCount as ecNumberCount, + cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar, + cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate, + cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset, + cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope, + cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray, + coalesce(oc.hasCentromere, 0) as hasCentromere, + coalesce(sc.contig_num, 0) as contigCount, + coalesce(sc.supercont_num, 0) as supercontigCount, + coalesce(sc.chrom_num, 0) as chromosomeCount, + coalesce(cc.communityCount, 0) as communityCount, + coalesce(psc.popsetCount, 0) as popsetCount, + coalesce(pc.geneArrayCount, 0) as arrayGeneCount, + coalesce(pc.rnaSeqCount, 0) as rnaSeqCount, + coalesce(pc.rtPCRCount, 0) as rtPCRCount, + coalesce(ta.avg_transcript_length, 0) as avg_transcript_length + FROM apidb.Organism o + INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id + INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id + LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id + LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id + LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id + LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id + LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id + LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id + LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id + LEFT JOIN ( + SELECT taxon_id, round(avg(length),1) as avg_transcript_length + FROM TranscriptAttributes + GROUP by taxon_id + ) ta ON o.taxon_id = ta.taxon_id + WHERE tn.name_class = 'scientific name' + ) oa, + TaxonSpecies ts, + sres.taxon t, + sres.taxonname tn2 + WHERE oa.component_taxon_id = ts.taxon_id + AND ts.species_taxon_id = t.taxon_id + AND ts.species_taxon_id = tn2.taxon_id + AND tn2.name_class = 'scientific name' + ]]> + + + + + + + + + + Stores, for each transcript, a string containing the gene-relative coordinates + of all its introns and UTRs. + + + + + + + + + + + + + + + + + + + + + Stores special webservice abbreviations which are not standard organism + names. Each record maps an organism name onto this abbreviation, as + well as the species name and project ID. Used by the model and as an + input in the creation of the OrganismAbbreviationBlast tuning table. + Propagated to portal instances. + + + + + + + + + + + + + Group species by higher level taxonomy. Each row associates a taxon of + interest with one of its ancestors in the taxon tree. Used in parameter + queries that have to know about the taxon tree. Propagated to portal + instances. + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each record maps an organism to its BLAST abbreviation. Used by + BLAST-query parameters. Propagated to portal instances. + + + + + + + + + + + + + For each project, show which BLAST databases are available for which + species. Used in BLAST param queries. Propagated to portal instances. + + + + + + + + + + + + + + + Each row stores mass-spec. based expression evidence for one sample of + one experiment for one gene. Used for mass spec queries in the model, + GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary + tuning table. + + + + + + + + + + + + + + + + + + + + + + + + + + + + Stores summary information from annotated genomes to facilitate overview section of gene page + + + + + + + + + + + + + + + Mass-spec experiment results for a peptide. Used by the model, GBrowse, + and PBrowse. + + + + + + + + + + + + + + + + + + + + + + + + + Data from the Seattle Structural Genomics Center for Infectious Disease, + populated from their web service. Used in the gene record. + + + + + + + + Used by the model and GBrowse, as well as an input in the creation of + the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary. + + + + + + + + Used by the model when writing profile data + + + + + + + + Associates an organism with the GBrowse and PBrowse tracks available + for it. Used by the gene record. + + + + + + + + + + Each row maps a dataset onto an ID for which the dataset contains data; + each dataset gets one such row. + Used in dataset record queries. + + + + + + + + + + + + + + + + + Citation info for proteomics datasets, used by GBrowse + + + + ' || sample || '

' as sample_i + FROM MSPeptideSummary mps, DatasetPresenter ds + -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem + WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern) + ) t + group by name, id + ) + SELECT name, + substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable') + || ' PMID: ' || publications || '

Samples:

' + || sample_table || chr(10) || + ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation + FROM ( + SELECT ds.name as name, ds.summary as description, pubs.contact_email as email, + pubs.pmids as publications, samples.sample_table as sample_table + FROM DatasetPresenter ds, pubs, samples + WHERE ds.dataset_presenter_id = pubs.id + AND ds.dataset_presenter_id = samples.id + ) t + ]]> +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = commit_after THEN + COMMIT; + ctrows := 0; + END IF; + END LOOP; + commit; + END; + $$ LANGUAGE PLPGSQL; + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id) + has data for a profile_set. + + + + + + + + + + + + + + + for gene-page expression graphs + + + + + + + + + + + + + + + + + + + + + Data from STRING-DB.org, populated from their web service. + Used in the gene record. + + + + + + + + + annotation updates from Apollo + + + + + + + + + + + Stores a mapping between external databases, taxon IDs, and URLs + + + + + + + + + + + + + + + + + + + + + + + + + + + + Text from ApolloUpdate that can be used in site search to find genes + + + + + = au.mapping_start + AND ta.source_id = au.apolloTranscript + AND ga.strand_plus_minus = au.strand + AND ta.gene_source_id = ga.source_id + ]]> + + + + + + + + + + Text for PreferredProduct table on gene record page. + + + + + + + + + + + + + + + + + + + + + all products for each gene + + + + + + + + + + + + + = 3 -- no product in apidb.GeneFeatureProduct + union + select ta.gene_source_id AS source_id, t.product, ta.project_id, + null as is_alternate, + string_agg(ta.source_id, ', ' order by ta.source_id) as transcript_ids, + null as reference, null as evidence_code, null as evidence_code_parameter, + null as assigned_by, 'dots.Transcript' as source + from TranscriptAttributes ta, dots.Transcript t + where ta.na_feature_id = t.na_feature_id + and t.product is not null + and ta.gene_source_id + not in (select source_id + from GeneProduct + where source_rule < 6) -- product only in dots.Transcript + group by ta.gene_source_id, ta.project_id, t.product + union + select gp.source_id as source_id, gp.product, ga.project_id, + null as is_alternate, + null as transcript_ids, null as reference, null as evidence_code, + null as evidence_code_parameter, null as assigned_by, + 'unspecified product' as source + from GeneProduct gp, GeneAttributes ga + where gp.source_rule = 7 + and gp.source_id = ga.source_id + order by is_alternate desc, transcript_ids + ]]> + + + + + + + +
diff --git a/Model/lib/xml/tuningManager/apiTuningManager.xml b/Model/lib/xml/tuningManager/apiTuningManager.xml index 2dd94f6212..8ea8b951bc 100644 --- a/Model/lib/xml/tuningManager/apiTuningManager.xml +++ b/Model/lib/xml/tuningManager/apiTuningManager.xml @@ -3,1425 +3,7009 @@ - - - - - + + Locations and Sequence of Transmembrane Domains (TMHMM) + + + + + + + + + + + + + - - - - Map each GO term that is assigned to at least one gene to a GoSubset term - that is either itself or an immediate ancestor. By "immediate ancestor" - we mean an ancestor such that there isn't an intermediate ancestor also - in the subset. (Note that there can be multiple links as long as none is - in the subset.) This is currently restricted to 'goslim_generic', solely - by the condition in the SUBSET_TERM subquery. + + Locations and Sequence of Signal Peptide Domains (SignalP) - - - + + + + + + + + CREATE TABLE &prefixSignalPeptideDomains&1 AS + SELECT + gf.source_id gene_source_id + , t.source_id transcript_source_id + , taf.na_feature_id + , spf.aa_feature_id + , spf.aa_sequence_id + , spf.parent_id + , aal.start_min + , aal.end_max + , spf.algorithm_name + , substr(s.sequence, aal.end_max::INTEGER, 1) peptide_sequence + FROM + dots.SignalPeptideFeature spf + , dots.AaLocation aal + , dots.TranslatedAaFeature taf + , dots.TranslatedAaSequence tas + , dots.GeneFeature gf + , dots.AaSequence s + , dots.Transcript t + WHERE + spf.aa_sequence_id = s.aa_sequence_id + AND aal.aa_feature_id = spf.aa_feature_id + AND t.na_feature_id = taf.na_feature_id + AND taf.aa_sequence_id = tas.aa_sequence_id + AND tas.aa_sequence_id = spf.aa_sequence_id + AND gf.na_feature_id = t.parent_id + AND (spf.signal_probability >= .5 + OR spf.signal_probability IS NULL + OR ((spf.means_score + spf.maxy_score) / 2) >= .5 + OR ( spf.maxy_conclusion + spf.maxc_conclusion + spf.maxs_conclusion + spf.means_conclusion ) >= 3 + ) + ORDER BY + spf.aa_sequence_id, spf.aa_feature_id + ]]> + ]]> - + + + + - - Each row maps a dataset onto an ID for which the dataset contains data; - each dataset gets one such row. - Used in dataset record queries. + + Taxon ranks for organisms - - - + + + - - Stores per-organism information. Used by the organism record, as well - as by project_id(), the function that maps an organism to a project. + + Attributes for Metabolic Pathways - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + p.source_id + , p.pathway_id + , p.name + , enz.total_enzyme_count + , cpd.total_compound_count + , p.url + , replace(replace(ed.name, 'Pathways_', ''), '_RSRC', '') as pathway_source + , ed.name as external_db_name + , edr.version as external_db_version + FROM + sres.pathway p + , sres.externalDatabase ed + , sres.externalDatabaseRelease edr + ,(SELECT + COUNT( *) AS total_compound_count + , pathway_id + FROM + sres.pathwayNode pn + , SRES.ontologyterm ot + WHERE + pn.pathway_node_type_id = ot.ontology_term_id + AND ot.name = 'molecular entity' + GROUP BY + pathway_id + ) cpd + ,(SELECT + COUNT( *) AS total_enzyme_count + , pathway_id + FROM + sres.pathwayNode pn + , SRES.ontologyterm ot + WHERE + pn.pathway_node_type_id = ot.ontology_term_id + AND ot.name = 'enzyme' + GROUP BY + pathway_id + ) enz + WHERE + ed.external_database_id = edr.external_database_id + AND edr.external_database_release_id = p.external_database_release_id + AND cpd.pathway_id = p.pathway_id + AND enz.pathway_id = p.pathway_id + AND source_id NOT IN('ec01100', 'ec01110', 'ec01120') + -- temporarily remove MPMP from release 46 + AND ed.name NOT LIKE '%MPMP%' + ]]> +
0 then 1 else 0 end as hasCentromere - FROM DOTS.MISCELLANEOUS f - , sres.ontologyTerm ot - , dots.nasequence s - WHERE ot.ontology_term_id = f.sequence_ontology_id - AND ot.name='centromere' - AND f.na_sequence_id = s.na_sequence_id - GROUP BY s.taxon_id + CREATE UNIQUE INDEX PathAttr_sourceId_pwaySrc&1 + ON &prefixPathwayAttributes&1 (source_id, pathway_source) + ]]> +
+ + + + synteny stats for each reference-taxon / comparison-taxon pair + + + + + + + + + + + + + + + + + + = 100 - AND s.is_best_alignment in (1) - AND s.percent_est_bases_aligned >= 20 - AND s.percent_identity >= 90 - AND e.best_alignment_count <= 1 - AND e.source_id = s.accession - GROUP by s.gene HAVING count(*) >= 1 - ) est ON ga.source_id = est.source_id - RIGHT OUTER JOIN ( - SELECT project_id, taxon_id, - max(database_version) as database_version, - CASE WHEN ncbi_tax_id > 9000000000 THEN NULL - ELSE ncbi_tax_id - END ncbi_tax_id, - to_char(sum(length)/1000000,'9999.99') as megabps - FROM GenomicSeqAttributes - WHERE is_top_level = 1 - GROUP BY project_ID, taxon_id, ncbi_tax_id - ) genomestat ON genomestat.taxon_id = ga.taxon_id - LEFT OUTER JOIN ( - SELECT count(distinct ga.source_id) as ct, ga.taxon_id - FROM GeneAttributes ga, SnpAttributes sf - WHERE sf.gene_source_id = ga.source_id - AND ga.is_deprecated = 0 - GROUP BY ga.taxon_id - ) snpCount ON ga.taxon_id = snpCount.taxon_id - GROUP BY genomestat.taxon_id, - genomestat.project_id, - genomestat.database_version, - genomestat.ncbi_tax_id, - genomestat.Megabps, - snpCount.ct + DO $$ + DECLARE + idlist RECORD; + BEGIN + FOR idlist IN ( SELECT DISTINCT organism FROM GeneAttributes ) + LOOP + INSERT INTO TranscriptPathway&1 + WITH transcript_ec AS ( + SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, + regexp_count( ec.ec_number, '-') as wildcard_count + FROM sres.EnzymeClass ec + WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM dots.AaSequenceEnzymeClass) + ), + pathway_node_ec AS ( + SELECT distinct pn.pathway_id, pn.row_id as enzyme_class_id + FROM sres.PathwayNode pn, sres.ontologyterm ot + WHERE pn.pathway_node_type_id = ot.ontology_term_id + AND ot.name = 'enzyme' + AND pn.display_label != '-.-.-.-' + ), + pathway_ec AS ( + SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, + regexp_count( ec.ec_number, '-') as wildcard_count + FROM sres.EnzymeClass ec + WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM pathway_node_ec) + GROUP BY ec.enzyme_class_id + ), + ec_match AS ( + SELECT tec.enzyme_class_id as transcript_enzyme_class_id, + pec.enzyme_class_id as pathway_enzyme_class_id, + tec.wildcard_count as wildcard_count_transcript, + pec.wildcard_count as wildcard_count_pathway, + tec.ec_number as ec_number_transcript, + pec.ec_number as ec_number_pathway + FROM transcript_ec tec, pathway_ec pec + WHERE (tec.ec_number_1 = pec.ec_number_1 or tec.ec_number_1 is null or pec.ec_number_1 is null) + AND (tec.ec_number_2 = pec.ec_number_2 or tec.ec_number_2 is null or pec.ec_number_2 is null) + AND (tec.ec_number_3 = pec.ec_number_3 or tec.ec_number_3 is null or pec.ec_number_3 is null) + AND (tec.ec_number_4 = pec.ec_number_4 or tec.ec_number_4 is null or pec.ec_number_4 is null) + ) + SELECT DISTINCT ga.source_id + , ga.gene_source_id + , ga.project_id + , pa.source_id as pathway_source_id + , pa.name as pathway_name + , ec_match.ec_number_transcript as ec_number_gene + , ec_match.wildcard_count_transcript as wildcard_count_gene + , ec_match.ec_number_pathway + , ec_match.wildcard_count_pathway + , CASE WHEN ec_match.ec_number_pathway = ec_match.ec_number_transcript + THEN 1 + ELSE 0 END as exact_match + , CASE WHEN ec_match.wildcard_count_pathway + ec_match.wildcard_count_transcript = 0 + THEN 1 + ELSE 0 END as complete_ec + , pa.pathway_id + , pa.pathway_source + , p.external_database_release_id + FROM PathwayAttributes pa + , sres.pathway p + , pathway_node_ec pec + , ec_match + , dots.AaSequenceEnzymeClass asec + , TranscriptAttributes ga + WHERE ga.organism = idlist.organism + AND pa.pathway_id = pec.pathway_id + AND p.pathway_id = pa.pathway_id + AND pec.enzyme_class_id = ec_match.pathway_enzyme_class_id + AND asec.enzyme_class_id = ec_match.transcript_enzyme_class_id + AND ga.aa_sequence_id = asec.aa_sequence_id + AND ( + (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived') + OR ga.orthomcl_name IS NOT NULL + ) + ; + commit; + END LOOP; + END; + $$ LANGUAGE PLPGSQL; ]]> 10000000 - -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' || - -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1" - -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits - then 'TMPTX_' || o.public_abbrev - else 'NCBITAXON_' || t.ncbi_tax_id - end as source_id, - o.abbrev as internal_abbrev, - o.public_abbrev, - o.orthomcl_abbrev, - o.family_name_for_files, - tn.name as organism_name, - o.genome_source, - o.strain_abbrev, - o.is_annotated_genome, - o.is_reference_strain, - o.is_family_representative, - o.name_for_filenames, - o.taxon_id as component_taxon_id, - gc.database_version, - gc.megabps as megabps, - gc.ncbi_tax_id as ncbi_tax_id, - gc.snpCount as snpCount, - gc.geneCount as geneCount, - gc.pseudoGeneCount as pseudoGeneCount, - gc.codingGeneCount as codingGeneCount, - gc.otherGeneCount as otherGeneCount, - gc.ChipChipGeneCount as ChipChipGeneCount, - gc.orthologCount as orthologCount, - gc.goCount as goCount, - gc.tfbsCount as tfbsCount, - gc.proteomicsCount as proteomicsCount, - gc.estCount as estCount, - gc.ecNumberCount as ecNumberCount, - cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar, - cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate, - cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset, - cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope, - cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray, - coalesce(oc.hasCentromere, 0) as hasCentromere, - coalesce(sc.contig_num, 0) as contigCount, - coalesce(sc.supercont_num, 0) as supercontigCount, - coalesce(sc.chrom_num, 0) as chromosomeCount, - coalesce(cc.communityCount, 0) as communityCount, - coalesce(psc.popsetCount, 0) as popsetCount, - coalesce(pc.geneArrayCount, 0) as arrayGeneCount, - coalesce(pc.rnaSeqCount, 0) as rnaSeqCount, - coalesce(pc.rtPCRCount, 0) as rtPCRCount, - coalesce(ta.avg_transcript_length, 0) as avg_transcript_length - FROM apidb.Organism o - INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id - INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id - LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id - LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id - LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id - LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id - LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id - LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id - LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id - LEFT JOIN ( - SELECT taxon_id, round(avg(length),1) as avg_transcript_length - FROM TranscriptAttributes - GROUP by taxon_id - ) ta ON o.taxon_id = ta.taxon_id - WHERE tn.name_class = 'scientific name' - ) oa, - TaxonSpecies ts, - sres.taxon t, - sres.taxonname tn2 - WHERE oa.component_taxon_id = ts.taxon_id - AND ts.species_taxon_id = t.taxon_id - AND ts.species_taxon_id = tn2.taxon_id - AND tn2.name_class = 'scientific name' + create index TranscriptPath_ix&1 + on TranscriptPathway&1(gene_source_id, source_id, pathway_source_id, + pathway_name, pathway_id, ec_number_gene, wildcard_count_pathway, + ec_number_pathway, pathway_source) + ]]> - - - - Stores, for each transcript, a string containing the gene-relative coordinates - of all its introns and UTRs. - - - + + + + - + ]]> - - - - Stores special webservice abbreviations which are not standard organism - names. Each record maps an organism name onto this abbreviation, as - well as the species name and project ID. Used by the model and as an - input in the creation of the OrganismAbbreviationBlast tuning table. - Propagated to portal instances. - - - + + + + + + + + + - - - - - Group species by higher level taxonomy. Each row associates a taxon of - interest with one of its ancestors in the taxon tree. Used in parameter - queries that have to know about the taxon tree. Propagated to portal - instances. - - - - - + + the max and min depth of each ontology term in OntologyRelationship. Used by the GoTermSummary tuning table + + + + + + + + + GeneGoTerms: each row represents one GO term assignment to one gene, right from what was loaded. + + + + + + + + + + + - - Each record maps an organism to its BLAST abbreviation. Used by - BLAST-query parameters. Propagated to portal instances. + + A tuning table for the gene record GO term table - - - - + + + + + - - For each project, show which BLAST databases are available for which - species. Used in BLAST param queries. Propagated to portal instances. + + + Map each GO term that is assigned to at least one gene to a GoSubset term + that is either itself or an immediate ancestor. By "immediate ancestor" + we mean an ancestor such that there isn't an intermediate ancestor also + in the subset. (Note that there can be multiple links as long as none is + in the subset.) This is currently restricted to 'goslim_generic', solely + by the condition in the SUBSET_TERM subquery. - - - - - - - + + + + + + - - Each row stores mass-spec. based expression evidence for one sample of - one experiment for one gene. Used for mass spec queries in the model, - GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary - tuning table. + + + GoTermSummary: each row represents one GO term assignment to one gene. + (Typically, a gene has multiple such assignments.) This is used for + finding gene-GO mappings, such as for the gene-page GO table. - - - - + + + + + + + + - - - - - - - - - - - - Stores summary information from annotated genomes to facilitate overview section of gene page + + Each row maps a dataset onto an ID for which the dataset contains data; + each dataset gets one such row. + Used in dataset record queries. + - - - - + - - Mass-spec experiment results for a peptide. Used by the model, GBrowse, - and PBrowse. + + + + + + + + + + + + + + Stores per-organism information. Used by the organism record, as well + as by project_id(), the function that maps an organism to a project. + + - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0 then 1 else 0 end as hasCentromere + FROM DOTS.MISCELLANEOUS f + , sres.ontologyTerm ot + , dots.nasequence s + WHERE ot.ontology_term_id = f.sequence_ontology_id + AND ot.name='centromere' + AND f.na_sequence_id = s.na_sequence_id + GROUP BY s.taxon_id ]]> - - - - - - Data from the Seattle Structural Genomics Center for Infectious Disease, - populated from their web service. Used in the gene record. - - - - - - - - Used by the model and GBrowse, as well as an input in the creation of - the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary. - - - - + + = g.start_min + AND g.na_sequence_id = seq.na_sequence_id + AND t.name = 'ExternalNASequence' + ) gene + ON gene.source_id = sim.source_id AND gene.sequence_id = sim.sequence_source_id) + GROUP BY sim.taxon_id + ]]> + + + = 100 + AND s.is_best_alignment in (1) + AND s.percent_est_bases_aligned >= 20 + AND s.percent_identity >= 90 + AND e.best_alignment_count <= 1 + AND e.source_id = s.accession + GROUP by s.gene HAVING count(*) >= 1 + ) est ON ga.source_id = est.source_id + RIGHT OUTER JOIN ( + SELECT project_id, taxon_id, + max(database_version) as database_version, + CASE WHEN ncbi_tax_id > 9000000000 THEN NULL + ELSE ncbi_tax_id + END ncbi_tax_id, + to_char(sum(length)/1000000,'9999.99') as megabps + FROM GenomicSeqAttributes + WHERE is_top_level = 1 + GROUP BY project_ID, taxon_id, ncbi_tax_id + ) genomestat ON genomestat.taxon_id = ga.taxon_id + LEFT OUTER JOIN ( + SELECT count(distinct ga.source_id) as ct, ga.taxon_id + FROM GeneAttributes ga, SnpAttributes sf + WHERE sf.gene_source_id = ga.source_id + AND ga.is_deprecated = 0 + GROUP BY ga.taxon_id + ) snpCount ON ga.taxon_id = snpCount.taxon_id + GROUP BY genomestat.taxon_id, + genomestat.project_id, + genomestat.database_version, + genomestat.ncbi_tax_id, + genomestat.Megabps, + snpCount.ct + ]]> + + + 10000000 + -- then 'TMPTX_' || round(t.ncbi_tax_id / 10000000) || '_' || + -- mod(t.ncbi_tax_id, 10000000) -- e.g. "TMPTX_930_1" + -- then 'TMPTX_' || t.ncbi_tax_id -- all the many digits + then 'TMPTX_' || o.public_abbrev + else 'NCBITAXON_' || t.ncbi_tax_id + end as source_id, + o.abbrev as internal_abbrev, + o.public_abbrev, + o.orthomcl_abbrev, + o.family_name_for_files, + tn.name as organism_name, + o.genome_source, + o.strain_abbrev, + o.is_annotated_genome, + o.is_reference_strain, + o.is_family_representative, + o.name_for_filenames, + o.taxon_id as component_taxon_id, + gc.database_version, + gc.megabps as megabps, + gc.ncbi_tax_id as ncbi_tax_id, + gc.snpCount as snpCount, + gc.geneCount as geneCount, + gc.pseudoGeneCount as pseudoGeneCount, + gc.codingGeneCount as codingGeneCount, + gc.otherGeneCount as otherGeneCount, + gc.ChipChipGeneCount as ChipChipGeneCount, + gc.orthologCount as orthologCount, + gc.goCount as goCount, + gc.tfbsCount as tfbsCount, + gc.proteomicsCount as proteomicsCount, + gc.estCount as estCount, + gc.ecNumberCount as ecNumberCount, + cast(coalesce(dsc.Organellar_Has, 0) as NUMERIC(1)) as isOrganellar, + cast(coalesce(dsc.HTSIsolate_Has, 0) as NUMERIC(1)) as hasHTSIsolate, + cast(coalesce(dsc.Popset_Has, 0) as NUMERIC(1)) as hasPopset, + cast(coalesce(dsc.Epitope_Has, 0) as NUMERIC(1)) as hasEpitope, + cast(coalesce(dsc.Array_Has, 0) as NUMERIC(1)) as hasArray, + coalesce(oc.hasCentromere, 0) as hasCentromere, + coalesce(sc.contig_num, 0) as contigCount, + coalesce(sc.supercont_num, 0) as supercontigCount, + coalesce(sc.chrom_num, 0) as chromosomeCount, + coalesce(cc.communityCount, 0) as communityCount, + coalesce(psc.popsetCount, 0) as popsetCount, + coalesce(pc.geneArrayCount, 0) as arrayGeneCount, + coalesce(pc.rnaSeqCount, 0) as rnaSeqCount, + coalesce(pc.rtPCRCount, 0) as rtPCRCount, + coalesce(ta.avg_transcript_length, 0) as avg_transcript_length + FROM apidb.Organism o + INNER JOIN sres.TaxonName tn ON tn.taxon_id = o.taxon_id + INNER JOIN sres.Taxon t ON t.taxon_id = tn.taxon_id + LEFT JOIN DataSourceCount dsc ON o.taxon_id = dsc.taxon_id + LEFT JOIN OrganismCentromere oc ON o.taxon_id = oc.taxon_id + LEFT JOIN SequenceCount sc ON o.taxon_id = sc.taxon_id + LEFT JOIN CommunityCount cc ON o.taxon_id = cc.taxon_id + LEFT JOIN GeneCount gc ON o.taxon_id = gc.taxon_id + LEFT JOIN popsetCount psc ON o.taxon_id = psc.taxon_id + LEFT JOIN profileCount pc ON o.taxon_id = pc.taxon_id + LEFT JOIN ( + SELECT taxon_id, round(avg(length),1) as avg_transcript_length + FROM TranscriptAttributes + GROUP by taxon_id + ) ta ON o.taxon_id = ta.taxon_id + WHERE tn.name_class = 'scientific name' + ) oa, + TaxonSpecies ts, + sres.taxon t, + sres.taxonname tn2 + WHERE oa.component_taxon_id = ts.taxon_id + AND ts.species_taxon_id = t.taxon_id + AND ts.species_taxon_id = tn2.taxon_id + AND tn2.name_class = 'scientific name' + ]]> + + + --> - - Used by the model when writing profile data - - + ]]> + - - - Associates an organism with the GBrowse and PBrowse tracks available - for it. Used by the gene record. + + Each record maps a gene to a PDB structure. Used by the model to find + genes that have a PDB structure and to find the PDB structures for a + given gene. - - - + + + + + + + + + GeneId maps any valid ID for a gene onto its official ID. These two quantities + are stored in the "id" and "gene" columns, respectively. The "unique_mapping" + column is set to 1 for IDs which map to only one gene. - - Each row maps a dataset onto an ID for which the dataset contains data; - each dataset gets one such row. - Used in dataset record queries. + Most of the CREATE TABLE statement is made up of the union of nine subqueries, + each of which looks in a different place for gene IDs. Each subquery populates + the "union_member" field with a different literal string, to make it easier to + understand which part (or parts) of the SQL is responsible for each ID-to-gene + mapping. - - - - - - + + + + + + + + + + + + + = pred_loc.start_min + AND pred_loc.is_reversed = gene_loc.is_reversed + AND pred_loc.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + UNION + SELECT ng.name AS id, gf.source_id AS gene, + 'NaGene' as union_member, ed.name as database_name /* dots.NaGene.name */ + FROM dots.GeneFeature gf, dots.NaFeatureNaGene nfng, dots.NaGene ng, + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + WHERE gf.na_feature_id = nfng.na_feature_id + AND ng.na_gene_id = nfng.na_gene_id + AND gf.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + UNION + SELECT source_id AS id, source_id AS gene, + 'same ID' as union_member, ed.name as database_name /* same ID (reflexive mapping) */ + FROM dots.GeneFeature gf, + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + WHERE gf.external_database_release_id = edr.external_database_release_id + AND edr.external_database_id = ed.external_database_id + UNION + SELECT n.name AS id, gf.source_id AS gene, + 'gene name' as union_member, d.name as database_name -- apidb.GeneFeatureName.name + from dots.genefeature gf, sres.ExternalDatabaseRelease r, sres.ExternalDatabase d, + ( select na_feature_id, name + from apidb.GeneFeatureName + where is_preferred = 1 + EXCEPT + -- suppress gene/name associations from the *DELETED_RSRC databases + select gfn.na_feature_id, gfn.name + from apidb.GeneFeatureName gfn, + sres.ExternalDatabase ed, sres.ExternalDatabaseRelease edr + where gfn.external_database_release_id = edr.external_database_release_id + and ed.external_database_id = edr.external_database_id + and ed.name like '%DELETED_RSRC' + ) n + where n.na_feature_id = gf.na_feature_id + and gf.external_database_release_id = r.external_database_release_id + and r.external_database_id = d.external_database_id + UNION + select dr.primary_identifier as id, + gf.source_id as gene, + 'AA feature DbRef primary ID' as union_member, + ed.name as database_name /* DbRef.primary_identifier mapped through DbRefAaFeature */ + from dots.GeneFeature gf, dots.Transcript t, dots.TranslatedAaFeature taf, + dots.DbRefAaFeature draf, sres.DbRef dr, + sres.ExternalDatabaseRelease edr, sres.ExternalDatabase ed + where gf.na_feature_id = t.parent_id + and t.na_feature_id = taf.na_feature_id + and taf.aa_feature_id = draf.aa_feature_id + and draf.db_ref_id = dr.db_ref_id + and dr.external_database_release_id = edr.external_database_release_id + and edr.external_database_id = ed.external_database_id + and ed.name + not in ('INTERPRO', 'PFAM', 'PIRSF', 'PRODOM', 'PROSITEPROFILES', + 'SMART', 'SUPERFAMILY', 'TIGRFAM', 'CDD','HAMAP','HMMPANTHER', + 'PRINTS','SCANPROSITE','SFLD') + ) mapping, + dots.GeneFeature gf, dots.NaSequence ns + WHERE mapping.gene = gf.source_id + AND gf.na_sequence_id = ns.na_sequence_id + AND (ns.taxon_id::varchar = '&filterValue' or length('&filterValue') = 0) + AND (gf.is_predicted != 1 OR gf.is_predicted is null) + GROUP BY mapping.id, mapping.gene ]]> - - - - Citation info for proteomics datasets, used by GBrowse - - ' || sample || '

' as sample_i - FROM MSPeptideSummary mps, DatasetPresenter ds - -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem - WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern) - ) t - group by name, id - ) - SELECT name, - substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable') - || ' PMID: ' || publications || '

Samples:

' - || sample_table || chr(10) || - ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation - FROM ( - SELECT ds.name as name, ds.summary as description, pubs.contact_email as email, - pubs.pmids as publications, samples.sample_table as sample_table - FROM DatasetPresenter ds, pubs, samples - WHERE ds.dataset_presenter_id = pubs.id - AND ds.dataset_presenter_id = samples.id + INSERT INTO &prefixGeneId&1 + (id, gene, unique_mapping, union_member, database_name) + WITH munge + AS (SELECT DISTINCT + regexp_replace(id, '\.\d\d?$', '') as id, + gene, unique_mapping, union_member, database_name + FROM &prefixGeneId&1 + WHERE regexp_like(id, '(.*)\.\d\d?$') + ) + SELECT id, gene, 0 as unique_mapping, 'base ID' as union_member, database_name + FROM munge + WHERE id NOT IN (SELECT id FROM &prefixGeneId&1) + ]]> +
+ + -
- - + + --> - - - - - - - - - + ]]> + + + + + + - - - - - - + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
- - - - - - - - - - - - + + This table maps IDs for a sequence onto the official ID of the sequence. + It is analogous to GeneId, which does the same thing for genes. Used by + genomic-sequence record queries, by the sequence retrieval tool, and by + the BasketFixer, which updates users' baskets at release time to replace + old IDs with updated ones. + + + + - - - + + + + + + + + + + + + + Stores (transcript, sequence, distance from centromere) 3-tuples for transcripts + that lie on a sequence for which we have a centomere location. + + + + + + + + + + + + + + + The BFMV for proteins. Each protein gets a single record, which + stores all its attributes. Used mainly to create TranscriptAttributes + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + The BFMV for the gene record. Each gene gets a single record, which + stores all its attributes. Used widely, in the model and elsewhere, for + queries involving genes, as well as in the creation of more than a + dozen other tuning tables. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 10 ) + WHERE ta.project_id = 'TriTrypDB' + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Stores, for each transcript, a string containing the gene-relative coordinates + of all its introns and UTRs. + + + + + + + + + + + + + + + + A single product string per gene + + + + + + + + + + + + + + + + + + The BFMV for the gene record. Each gene gets a single record, which + stores all its attributes. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each record represents one SNP. Widely used in the model, as well as in + the creation of several other tuning tables, Includes only NGS SNPs. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each row represents one EST. Used widely in the model, and to make the + tuning tables BlastTypes, OrganismAbbreviationBlast, and OrganismAttributes. + + + + + + + + + + + + + + + + + + + + + + Each row represents a colocated EST alignment - gene pair. Used by the + model, by generateGeneMetrics, and in the creation of the + OrganismAttributes tuning table + + + + + + + + + + + + + = 0 + AND query_sequence.na_sequence_id = ba.query_na_sequence_id + ]]> + + + + + + + + + + + + + + + + + + Each row represents one cosmid or bac end feature; for use in JBrowse. + + + + + + + + + + + + + + + Each record captures info for a strain/protocol app node. Used in the model, including + gene and SNP queries, as well as the gene record. + + + + + + + + + + + + + + + + Each record captures info for a strain/protocol app node. Used in the model, including + gene and SNP queries, as well as the gene record. + + + + + + + + + + + + + + + + + + + + The BFMV for the WDK popset record. Widely used in the model for + queries related to popsets. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Used by the GenesByChipChip(Plasmo|Toxo) query, as well as by + generateGeneMetrics. Also an input to OrganismAttributes. + + + + + + + 0 */ + CASE WHEN ta.is_reversed = 0 + THEN ta.start_min - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) + ELSE ta.end_max - (((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) + END > 0 + THEN + CASE + WHEN ta.is_reversed = 0 + THEN '-' + ELSE '+' + END + ELSE + CASE + WHEN ta.is_reversed = 1 + THEN '-' + ELSE '+' + END + END as direction, + sr.score1 as score + FROM TranscriptAttributes ta, + Results.segmentresult sr, + Study.StudyLink sl, + Study.Study s + WHERE sr.na_sequence_id = ta.na_sequence_id + AND s.study_id = sl.study_id + AND sl.protocol_app_node_id = sr.protocol_app_node_id + AND lower(s.name) like '%chip%peaks' + AND ( (ta.is_reversed = 0 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.start_min) <= 3000) + or (ta.is_reversed = 1 and abs((((sr.segment_end - sr.segment_start) / 2) + sr.segment_start) - ta.end_max) <= 3000) ) + ]]> + + + + + + + + + + + Used by gene queries, as well as by generateGeneMetrics. Also an input + to OrganismAttributes. + + + + + + 0 */ + CASE WHEN ga.is_reversed = 0 + THEN ga.start_min - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) + ELSE ga.end_max - (((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) + END > 0 + THEN + CASE + WHEN ga.is_reversed = 0 + THEN '-' + ELSE '+' + END + ELSE + CASE + WHEN ga.is_reversed = 1 + THEN '-' + ELSE '+' + END + END as direction, + aef.* + FROM dots.BindingSiteFeature aef, + apidb.FeatureLocation arrloc, + GeneAttributes ga + WHERE aef.na_feature_id = arrloc.na_feature_id + AND arrloc.na_sequence_id = ga.na_sequence_id + AND ( (ga.is_reversed = 0 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.start_min) <= 3000) + or (ga.is_reversed = 1 and abs((((arrloc.end_max - arrloc.start_min) / 2) + arrloc.start_min) - ga.end_max) <= 3000) ) + ]]> + + + + + + + + + + + + Each record maps a gene onto a subcellular location. Used by + GenesBySubcellularLocalization. + + + + + + + + + + + + + + + + Like dots.SimilaritySpan, except that for sequences that are mapped by + SequencePiece into parts of other sequences, both locations are stored. + Used by GBrowse, and also in the creation of the Blastx tuning table. + + + + + + + + = sim.max_query_end + AND sim.query_id = contig.na_sequence_id + AND sp.virtual_na_sequence_id = scaffold.na_sequence_id + ]]> + + + + + + + + + + + + + SNP Chip only, such as Plasmo barcode, 3k_chp and hd_array + + Each record represents one SNP. Widely used in the model, as well as in + the creation of several other tuning tables + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + this otherwise-unneeded tuning table, which depends on SnpAttributesDoTS, + exists so that the view SnpChipAttributes can be created as a side-effect + + + + + + + + + + + + + + + + Each record stores a Blastp similarity of a gene. Used by the gene-page + Blastp table. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each record stores a colocated (gene, popset) 2-tuple. Used by the + gene page as well as the PopsetByOverlap query. + + + + + + sim.min_subject_start + AND sim.query_id = ia.na_sequence_id + GROUP BY ia.source_id, fl.feature_source_id + ]]> + + + + + + + + + + + + Each record maps a taxon_id of interest onto the taxon_id of that taxon's + taxon-tree ancestor whose rank is "species". Used by + gene queries, and as an input in the + creation of several tuning tables, including GeneAttributes + + + + + + + + + + + + + + + + + Each record stores a Blastx similarity. Used by GBrowse for the + match:WU_BLASTX track. + + + + + + + + + + + + + + + Each record stores the transcript sequence of one gene. Used by the + gene record and the sequence retrieval tool. Propagated to the portal. + + + + + + + + + + + + + + Each record stores the coding sequence of one gene. Used by the + gene record and the sequence retrieval tool. Propagated to the portal. + + + + + + + + + + + + + + + Each record stores the coding sequence of one gene. Used by the + gene record and the sequence retrieval tool, as well as by + buildTrackOldAnnotationTT. Propagated to the portal. + + + + + + + + + + + + + + + Each record stores the nucleotide sequence for one genomic sequence + that is "official" (in the sense that it can be instantiated as a WDK + sequence record. Used by generatePathoLogicFile and the sequence + retrieval tool Propagated to portal instances. + + + + + + + + + + + + + + + + Each record stores the nucleotide sequence of an EST, for use by the + relevant attribute query in the WDK EST record. Propagated to portal + instances. + + + + + + + + + + + + + + + + Mapping table of experiment and sample names to junction protocol_app_node_id + + + + + + + + + + + + = 1 + GROUP BY protocol_app_node_id + ), part AS ( + SELECT + ij.junctions_pan_id, ij.avg_value, stats.multiplier + , max(ij.expression_pan_id) OVER w as max_exp_pan_id + , max(ij.sample_name) OVER w as max_sample_Name + , max(ij.exp_name) OVER w as max_exp_name + FROM ij, stats + WHERE ij.junctions_pan_id = stats.protocol_app_node_id + WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) + ) + SELECT DISTINCT * FROM ( + SELECT junctions_pan_id + , first_value(max_exp_pan_id) OVER w1 as exp_pan_id + , first_value(max_sample_name) OVER w1 as sample_name + , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands + , multiplier + FROM part + WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) + ) t + ORDER BY junctions_pan_id + ]]> + + + + + + + + + + + + + + + + + + + + + + + Table collects up single row / intronjunction (identified as all junctions with same start, end and strand). Statistics are generated including percentages of max intron score and ratios vs expression on an overall level. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 500000 THEN 500000 ELSE step_mult END as seq_step_mult + FROM ( + SELECT gs.na_sequence_id, gs.length, gs.taxon_id, 25000 * (1 + floor(gs.length/count(*))) as step_mult + FROM apidb.intronjunction ij, dots.nasequence gs + WHERE gs.na_sequence_id = ij.na_sequence_id + GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id + ) t + ORDER BY taxon_id + ) + LOOP + iter_length := idlist.seq_step_mult; + i_first_pos := 1; + i_last_pos := i_first_pos + iter_length; + WHILE i_first_pos < idlist.length + LOOP + INSERT INTO GIJtmp + SELECT DISTINCT + junc.*, + CASE + WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed + THEN 1 + ELSE 0 + END as matches_gene_strand, + last_value(ga.source_id) over w1 as gene_source_id, + last_value(ga.na_feature_id) over w1 as gene_na_feature_id, + CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron + FROM ( + SELECT ij.na_sequence_id,seq.source_id as sequence_source_id,ij.segment_start,ij.segment_end, + sum(ij.unique_reads) as total_unique, round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm, + ij.is_reversed,seq.source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id + FROM apidb.intronjunction ij, namemappinggij je, dots.nasequence seq + WHERE ij.na_sequence_id = idlist.na_sequence_id + AND ij.segment_start between i_first_pos and i_last_pos + AND ij.na_sequence_id = seq.na_sequence_id + AND ij.unique_reads >= 1 + AND je.junctions_pan_id = ij.protocol_app_node_id + AND je.multiplier < 20 + GROUP BY ij.na_sequence_id,ij.segment_start,ij.segment_end, ij.is_reversed,seq.source_id + ) junc + LEFT JOIN GeneIdLocGIJ&1 ga ON + junc.na_sequence_id = ga.na_sequence_id + AND junc.segment_start >= ga.start_min + AND junc.segment_end <= ga.end_max + AND junc.is_reversed = ga.is_reversed + LEFT JOIN annotgij ag ON + junc.na_sequence_id = ag.na_sequence_id + AND junc.segment_start = ag.start_min + AND junc.segment_end = ag.end_max + AND junc.is_reversed = ag.is_reversed + WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron') + WINDOW w1 AS ( + PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type + ORDER BY ga.total_expression ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + ; + commit; + i_first_pos := i_last_pos + 1; + i_last_pos := i_first_pos + iter_length; + END LOOP; + END LOOP; + END; + $$ LANGUAGE PLPGSQL; + ]]> + + + + + + 0 THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) ELSE null END as percent_max, + CASE WHEN maxv.gene_source_id is not null THEN 1 ELSE 0 END as contained, + CAST (null as numeric(10)) as taxon_id, + cast (null as numeric(10)) as upstream_gene_id, + cast (null as numeric) as upstream_distance, + cast (null as numeric(10)) as downstream_gene_id, + cast (null as numeric) as downstream_distance + FROM + gijtmp junc LEFT JOIN + ( + SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm + FROM gijtmp + WHERE gene_source_id is not null + GROUP BY gene_source_id + ) maxv ON junc.gene_source_id = maxv.gene_source_id + ]]> + + + + + + + + + + + + + + + + + Stores statistics for annotated introns used for configuring JBrowse tracks based on organism. + + + + + + + + + + + + Stores maximum values per gene for each sample so percent max intron can be computed for sample table. + + + + + + + + + = j.segment_end + AND ga.is_reversed = j.is_reversed + AND j.protocol_app_node_id = mult.junctions_pan_id + GROUP BY j.protocol_app_node_id, ga.source_id + ); + commit; + END LOOP; + END; + $$ LANGUAGE PLPGSQL; + ]]> + + + + + + + + Each record stores the nucleotide sequence of one popset. Used in the + relevant attribute query of the WDK popset record, as well as by + PopsetClustalOmega. Propagated to portal instances. + + + + + + + + + + + + + + Each record maps an organism name onto an abbreviation, getting the + pair either from apidb.Organism or (in the case of Tvag), hardwired + into the below SQL. This table will eventually be replaced by workflow. + + + + + + + + + + + Stores special webservice abbreviations which are not standard organism + names. Each record maps an organism name onto this abbreviation, as + well as the species name and project ID. Used by the model and as an + input in the creation of the OrganismAbbreviationBlast tuning table. + Propagated to portal instances. + + + + + + + + + + + Groups projects by higher level taxonomy. Used in the creation of the + OldOrganismTree tuning table. + + + + + + + + + + Group species by higher level taxonomy. Each row associates a taxon of + interest with one of its ancestors in the taxon tree. Used in parameter + queries that have to know about the taxon tree, as well as + apidb.project_id(), the function that maps an organism name to a + project. Propagated to portal instances. + + + + + + + + + + + + + + + + + + + Group species by higher level taxonomy. Each row associates a taxon of + interest with one of its ancestors in the taxon tree. Used in parameter + queries that have to know about the taxon tree. Propagated to portal + instances. + + + + + + + + + + + + + + + + + + + + + + + + + + + + Each record maps an organism to its BLAST abbreviation. Used by + BLAST-query parameters. Propagated to portal instances. + + + + + + + + + + + + + For each project, show which BLAST databases are available for which + species. Used in BLAST param queries. Propagated to portal instances. + + + + + + + + + + + + + + + + Properties table for ChEBI Compounds. + + + + + + + + + + + + The BFMV for the compound WDK record type. Used by the model for the + compound record and queries, as well as in the creation of the + PathwayCompounds tuning table. Propagated to portal instances. + Note: children of ChEBI compounds are excluded, but data of these is gathered in the (parent) entries. + + + + + + + + + + + + + Alias table for Compounds. + Compound column is the source_id from CompoundAttributes (chEBI_ID). + ID can have this same ID, or mapping KEGG ID, or Name, or Synonym. + + + + + + + + + + + + + + + + Each record represents a 5-tuple of (reaction, compound, pathway, + enzyme, type). Used extensively in the model for pathway-related + queries, as well as by getImageMap.pl. + + + + + + + + + + + + + + + + + + + Aggregates reactions irrespective of pathway. Required to determine if BioCyc reactions are reversible. Used extensively in the model in conjunction with pathwaycompounds for pathway related queries + + + + + + + + + + ' || o.enzyme || '' ELSE o.enzyme END as expasy_html + FROM ( + SELECT i.* + , CASE WHEN i.enzyme like '%.%.%.%' and i.enzyme != '-.-.-.-' + THEN + 'http://enzyme.expasy.org/cgi-bin/enzyme/enzyme-search-ec?field1=' + || ec.ec_number_1 + || CASE ec.ec_number_2 WHEN null THEN null ELSE chr(38) || 'field2=' || ec.ec_number_2 END + || CASE ec.ec_number_3 WHEN null THEN null ELSE chr(38) || 'field3=' || ec.ec_number_3 END + || CASE ec.ec_number_4 WHEN null THEN null ELSE chr(38) || 'field4=' || ec.ec_number_4 END + ELSE reaction_url END as expasy_url + , ec.description as enzyme_description + FROM ( + SELECT + reaction_id + , reaction_source_id + , reaction_url + , ext_db_name + , ext_db_version + , enzyme + , substrates_html || ' ' || sign || ' ' || products_html as equation_html + , substrates_text || ' ' || sign || ' ' || products_text as equation_text + , case when sign = '<=>' then 1 else 0 end as is_reversible + , substrates_text + , products_text + FROM ( + SELECT + reaction_id + , reaction_source_id + , reaction_url + , ext_db_name + , ext_db_version + , enzyme + , (case when (string_agg (case when type_list like '%substrate%' then compound end, ',' order by compound)) = (string_agg (case when type_list like '%product%' then compound end, ',' order by compound)) or is_reversible = 1 then '<=>' else '=>' end) as sign + , string_agg(case when type like '%substrate%' then compound_url end, ' + ' order by compound_url) as substrates_html + , string_agg(case when type like '%substrate%' then compound end, ' + ' order by compound) as substrates_text + , string_agg(case when type like '%product%' then compound_url end, ' + ' order by compound_url) as products_html + , string_agg(case when type like '%product%' then compound end, ' + ' order by compound) as products_text + FROM ( + WITH rep AS ( + SELECT DISTINCT + pr.PATHWAY_REACTION_ID as reaction_id + , pr.SOURCE_ID as reaction_source_id + , pn.DISPLAY_LABEL as enzyme + , coalesce(ca.compound_name, pc.compound_source_id) as compound + , prel.is_reversible as is_reversible_og + , last_value(prel.is_reversible) OVER (partition by pr.pathway_reaction_id ORDER BY prel.is_reversible ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING ) as is_reversible + , first_value(pc.type) over (partition by pr.pathway_reaction_id, pr.SOURCE_ID, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE, coalesce(pc.chebi_accession, pc.compound_source_id), coalesce(ca.compound_name, pc.compound_source_id) ORDER BY pc.pathway_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as type + FROM + sres.pathway p + , apidb.pathwayreaction pr + , APIDB.PATHWAYREACTIONREL prr + , SRES.PATHWAYNODE pn + , SRES.PATHWAYRELATIONSHIP prel + , SRES.ONTOLOGYTERM ot + , PathwayCompounds pc + LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id + WHERE p.PATHWAY_ID = prr.PATHWAY_ID + AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID + AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID + AND prel.NODE_ID = pn.PATHWAY_NODE_ID + AND ot.name = 'enzyme' + AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID + AND pc.PATHWAY_ID = p.PATHWAY_ID + AND pc.REACTION_id = pr.PATHWAY_REACTION_ID + ) + SELECT DISTINCT + pr.PATHWAY_REACTION_ID as reaction_id + , pr.SOURCE_ID as reaction_source_id + , ed.NAME as ext_db_name + , edr.VERSION as ext_db_version + , cast(pn.DISPLAY_LABEL as varchar(20)) as enzyme + , min(rep.is_reversible) as is_reversible + , min(rep.type) as type + , string_agg (pc.type, ',' order by p.pathway_id) as type_list + , coalesce(ca.compound_name, pc.compound_source_id) as compound + , CASE + WHEN coalesce(pc.CHEBI_ACCESSION, pc.compound_source_id) LIKE 'CHEBI%' + THEN '' || coalesce(ca.compound_name, pc.compound_source_id) || '' + ELSE coalesce(pc.chebi_accession, pc.compound_source_id) + END as compound_url + , CASE (replace (replace (ed.name, 'Pathways_', ''), '_RSRC', '')) + WHEN 'KEGG' THEN 'https://www.genome.jp/dbget-bin/www_bget?rn:' || pr.source_id + WHEN 'MetaCyc' THEN 'https://metacyc.org/META/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id + WHEN 'TrypanoCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/TRYPANO/new-image?type=REACTION' || chr(38) || 'object=' || pr.source_id + WHEN 'LeishCyc' THEN 'http://vm-trypanocyc.toulouse.inra.fr/LEISH/new-image?tyrp=REACTION' || chr(38) || 'object=' || pr.source_id + WHEN 'FungiCyc' THEN NULL + END as reaction_url + FROM + sres.pathway p + , apidb.pathwayreaction pr + , APIDB.PATHWAYREACTIONREL prr + , SRES.PATHWAYNODE pn + , SRES.PATHWAYRELATIONSHIP prel + , SRES.EXTERNALDATABASE ed + , SRES.EXTERNALDATABASERELEASE edr + , SRES.ONTOLOGYTERM ot + , rep + , PathwayCompounds pc + LEFT JOIN CompoundAttributes ca ON pc.chebi_accession = ca.source_id + WHERE p.PATHWAY_ID = prr.PATHWAY_ID + AND pr.PATHWAY_REACTION_ID = prr.PATHWAY_REACTION_ID + AND prr.PATHWAY_RELATIONSHIP_ID = prel.PATHWAY_RELATIONSHIP_ID + AND prel.NODE_ID = pn.PATHWAY_NODE_ID + AND ot.name = 'enzyme' + AND ot.ONTOLOGY_TERM_ID = pn.PATHWAY_NODE_TYPE_ID + AND pc.EXT_DB_NAME = ed.NAME + AND pc.EXT_DB_VERSION = edr.VERSION + AND ed.EXTERNAL_DATABASE_ID = edr.EXTERNAL_DATABASE_ID + AND pc.PATHWAY_ID = p.PATHWAY_ID + AND pc.REACTION_id = pr.PATHWAY_REACTION_ID + AND rep.reaction_id = pr.pathway_reaction_id + AND rep.reaction_source_id = pr.source_id + AND rep.compound = coalesce(ca.compound_name, pc.compound_source_id) + AND rep.enzyme = pn.DISPLAY_LABEL + AND rep.is_reversible_og = prel.is_reversible + GROUP BY pr.pathway_reaction_id, pr.SOURCE_ID, ed.NAME, edr.VERSION, pn.DISPLAY_LABEL, prel.IS_REVERSIBLE + , coalesce(pc.chebi_accession, pc.compound_source_id) + , coalesce(ca.compound_name, pc.compound_source_id) + ) t1 + GROUP BY reaction_id, reaction_source_id, reaction_url, ext_db_name, ext_db_version, enzyme, is_reversible + ) t2 + ) i + LEFT OUTER JOIN sres.enzymeclass ec ON i.enzyme = ec.ec_number + ) o + ]]> + + + + + + + + + Nodes and edges for pathway maps + + + + + + + + + + + + + + + + + + + + + + + 1 + ) + SELECT aee.e_id, pn.* + FROM pn + , AllEnzymeEdges aee + WHERE aee.all_edges = pn.all_edges + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + Used by pathway table on gene pages + + + + + + + + + + + + + + + for use in GenesByReactionCompounds question + + + + + + + + + + Each row stores mass-spec. based expression evidence for one sample of + one experiment for one gene. Used for mass spec queries in the model, + GBrowse, and PBrowse, and also in the creation of the MSTranscriptSummary + tuning table. + + + + + + + + + + + + + + + + + + + + + + + + + + + + Stores summary information from annotated genomes to facilitate overview section of gene page + + + + + + + + + + + + + + Mass-spec experiment results for a transcript. Used in the model for queries + related to transcripts. + + + + + + + + + + + + + Mass-spec experiment results for a peptide. Used by the model, GBrowse, + and PBrowse. + + + + + + + + + + + + + + + + + + + + + + + + + Data from the Seattle Structural Genomics Center for Infectious Disease, + populated from their web service. Used in the gene record. + + + + + + + + Used by the model and GBrowse, as well as an input in the creation of + the tuning tables like MSModifiedPeptideSummary and MSPeptideSummary. + + + + + + + + Used by the model when writing profile data + + + + + + + + Associates an organism with the GBrowse and PBrowse tracks available + for it. Used by the gene record. + + + + + + + + + Associates an organism with the GBrowse and PBrowse tracks available + for it. Used by the gene record. + + + + + + + + + Each row maps a dataset onto an ID for which the dataset contains data; + each dataset gets one such row. + Used in dataset record queries. + + + + + + + + + + + + + + + + + + + Citation info for proteomics datasets, used by GBrowse + + + + ' || sample || '

' as sample_i + FROM MSPeptideSummary mps, DatasetPresenter ds + -- consider using the tuning table ExternalDbDatasetPresenter instead of the LIKE below, if its performance is a problem + WHERE (ds.name = mps.external_database_name or mps.external_database_name like ds.dataset_name_pattern) + ) t + group by name, id + ) + SELECT name, + substr(description, 4000, 1) || ' Primary Contact Email: '|| coalesce(email, 'unavailable') + || ' PMID: ' || publications || '

Samples:

' + || sample_table || chr(10) || + ' Please note that subtrack labels will disappear if the selected subtracks number is over 15!' as citation + FROM ( + SELECT ds.name as name, ds.summary as description, pubs.contact_email as email, + pubs.pmids as publications, samples.sample_table as sample_table + FROM DatasetPresenter ds, pubs, samples + WHERE ds.dataset_presenter_id = pubs.id + AND ds.dataset_presenter_id = samples.id + ) t + ]]> +
+
+ + + + + + + + + + = 1.5 + ) t + GROUP BY gene_source_id, project_id, sequence_id, haplotype_block_name, + start_min, end_max, start_max, end_min, organism + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = commit_after THEN - COMMIT; - ctrows := 0; - END IF; - END LOOP; - commit; - END; - $$ LANGUAGE PLPGSQL; + LOOP + ctrows := ctrows + 1; + INSERT INTO Profile&1 + (DATASET_NAME, DATASET_TYPE, DATASET_SUBTYPE, PROFILE_TYPE, NODE_TYPE, SOURCE_ID, PROFILE_STUDY_ID, PROFILE_SET_NAME, + PROFILE_SET_SUFFIX, PROFILE_AS_STRING, MAX_VALUE, MIN_VALUE, MAX_TIMEPOINT, MIN_TIMEPOINT) + VALUES + (pf_rows.DATASET_NAME, pf_rows.DATASET_TYPE, pf_rows.DATASET_SUBTYPE, pf_rows.PROFILE_TYPE, pf_rows.NODE_TYPE, pf_rows.SOURCE_ID, pf_rows.PROFILE_STUDY_ID, pf_rows.PROFILE_SET_NAME, + pf_rows.PROFILE_SET_SUFFIX, pf_rows.PROFILE_AS_STRING, pf_rows.MAX_VALUE, pf_rows.MIN_VALUE, pf_rows.MAX_TIMEPOINT, pf_rows.MIN_TIMEPOINT); + IF ctrows >= commit_after THEN + COMMIT; + ctrows := 0; + END IF; + END LOOP; + commit; + END; + $$ LANGUAGE PLPGSQL; + ]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id) + has data for a profile_set. + + + + + + + + + + + + + + + for gene-page expression graphs + + + + + + + + + + + + + + + + + + + + + For each distinct organism in GeneAttributes, all ancestors in the taxon tree. For the gene page. + + + + + + + + + + + + + + + + Materialization of GeneTables.GeneModelDump. + + + + + + + + + + + + + + + + + + + Distinct filter_name for gene query summary. For the initial version, + at least, it's (GeneAttriutes.species UNION GeneAttributes.organism). + Note that the UNION implies set bahavior and therefore distinctness. + + + + - ]]> + + + + Info from dots.ExternalAaSequence records for BLAT alignments + + + + + + + + + Materialization of the orthology transform. Also useful for GeneTables.Orthologs. + + + + + + + create UNLOGGED table SyntenicPairs as + select distinct ga.na_feature_id, sg.syn_na_feature_id + from apidb.SyntenicGene sg, GeneAttributes ga + where sg.na_sequence_id = ga.na_sequence_id + and sg.end_max >= ga.start_min + and sg.start_min <= ga.end_max + ]]> - + + + + + + + + - + ]]> +
+ + + For each RNA-Seq Dataset, compute the top 500 ratios of max/min gene expression. + + + + + + + - - - - - - - + + + + + + + for GeneTables.RodMalPhenotypeTable + + + + + + + + results.source_id + OR knockdown.source_id is null + ORDER BY results.source_id, results.rmgmid ]]> - - For all datasets, list all genes (source_id) of a gene_group where one of them (profile_graph_id) - has data for a profile_set. - - - - + + Chromosome data for CNV queries + + + - + - + ]]> + + + - - - for gene-page expression graphs - - - - - - - + + + Gene Data for CNV queries + + + - + CREATE TABLE GeneCopyNumbers&1 AS + SELECT DISTINCT ta.project_id + , ta.source_id + , ta.gene_source_id + , REGEXP_REPLACE(pan.name, '_[A-Za-z0-9]+ (.+)$', '') AS strain + , gcn.haploid_number AS raw_estimate + , gcn.ref_copy_number AS ref_cn + , CASE WHEN (gcn.haploid_number < 0.01) THEN 0 + WHEN (0.01 < gcn.haploid_number AND gcn.haploid_number < 1.85) THEN 1 + ELSE round(gcn.haploid_number) END AS haploid_number + , ta.chromosome + , ta.na_sequence_id + , io.input_pan_id + , io.output_pan_id + FROM apidb.genecopynumber gcn + , study.protocolappnode pan + , TranscriptAttributes ta + , PANIo io + WHERE gcn.protocol_app_node_id = pan.protocol_app_node_id + AND gcn.na_feature_id = ta.gene_na_feature_id + AND gcn.protocol_app_node_id = io.output_pan_id + AND (ta.gene_type = 'protein coding' or ta.gene_type = 'protein coding gene') + ]]> + ]]> @@ -2586,6 +8634,90 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id) + + for TranscriptAttributes.InterproColumns + + + + + + + + + + + + + + + + annotation updates from Apollo @@ -2595,6 +8727,37 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id) + + semicolon-delimited list of formatted genomic locations for each gene + + + + + + + + + @@ -2665,18 +8828,13 @@ create unique index Organism_sourceId_idx&1 ON OrganismAttributes&1 (source_id) - - - + + + + + + One phylogenetic-profile string per ortholog group + + + + + + + + + + @@ -2959,6 +9145,252 @@ sub readClob { + + + Links AlphaFold entries to gene ids where Uniprot ids are directly assigned + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Stores intron support for introns + + + + + + + + = CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END + AND (gij.contained = 0 or gij.percent_max >= 2 /*stats.perc0005_annot_percent_max*/) + ) t + GROUP BY gene_source_id, ontology_term, intron_count + ) t + WHERE string_value = 'All' + + UNION + + SELECT gene_source_id + , ontology_term + , case when count(*) = intron_count THEN 'All-high' + when count(*) = 0 THEN 'None' + else 'Any-high' end as string_value + FROM ( + SELECT gij.gene_source_id + , 'intron_junction' as ontology_term + , intronCount.intron_count + FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats + , (SELECT count (*) as intron_count, source_id FROM apidbtuning.genemodeldump WHERE type = 'Intron' GROUP BY source_id) intronCount + WHERE gij.gene_source_id = intronCount.source_id + AND gij.na_sequence_id = stats.na_sequence_id + AND gij.annotated_intron = 'Yes' + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END + AND (gij.contained = 0 or gij.percent_max >= 2) + ) t2 + GROUP BY gene_source_id, ontology_term, intron_count + + UNION + + SELECT gene_source_id + , ontology_term + , CASE WHEN count(*) = intron_count THEN 'All-low' + WHEN count(*) = 0 THEN 'None' + ELSE 'Any-low' END as string_value + FROM ( + SELECT gij.gene_source_id + , 'intron_junction' as ontology_term + , intronCount.intron_count + FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats + , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount + WHERE gij.gene_source_id = intronCount.source_id + AND gij.na_sequence_id = stats.na_sequence_id + AND gij.annotated_intron = 'Yes' + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END + AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max) + AND gij.intron_feature_id not in ( + SELECT gij.intron_feature_id + FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats + WHERE gij.na_sequence_id = stats.na_sequence_id + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END + AND (gij.contained = 0 or gij.percent_max >= 2) + ) + ) t3 + GROUP BY gene_source_id, ontology_term, intron_count + + UNION + + SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-low') as string_value + FROM ( + SELECT gene_source_id + , ontology_term + , case when count(*) = intron_count THEN 'All' + when count(*) = 0 THEN 'None' + else 'Any' end as string_value + FROM ( + SELECT gij.gene_source_id + , 'intron_junction' as ontology_term + , intronCount.intron_count + FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats + , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount + WHERE gij.gene_source_id = intronCount.source_id + AND gij.na_sequence_id = stats.na_sequence_id + AND gij.annotated_intron = 'Yes' + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END + AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max) + AND gij.intron_feature_id not in ( + SELECT gij.intron_feature_id + FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats + WHERE gij.na_sequence_id = stats.na_sequence_id + AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 + AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END + AND (gij.contained = 0 or gij.percent_max >= 2) + ) + ) t + GROUP BY gene_source_id, ontology_term, intron_count + ) t4 + WHERE string_value = 'All' + ) t +]]> + + + all products for each gene From aa89ddb6547c7c8d168e87e34fdc6bfe7c2374d4 Mon Sep 17 00:00:00 2001 From: Richard Demko Date: Mon, 9 Jun 2025 12:07:33 -0400 Subject: [PATCH 106/112] Resolving syntax error --- .../lib/psql/webready/comparative/GeneOrthologGroup.psql | 8 +++----- .../webready/comparative/TranscriptOrthologGroup.psql | 8 +++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql index 4c30361e4d..404c23f4dc 100644 --- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql +++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql @@ -1,10 +1,8 @@ create table :SCHEMA.GeneOrthologGroup as SELECT pa.gene_source_id AS gene_id, - ogas.group_id, + ogas.group_id FROM webready.proteinattributes pa, apidb.orthologgroupaasequence ogas - WHERE pa.aa_sequence_id = ogas.aa_sequence_id -; + WHERE pa.aa_sequence_id = ogas.aa_sequence_id; alter table :SCHEMA.GeneOrthologGroup - add constraint GeneOrthologGroup_pk primary key (gene_id) -; + add constraint GeneOrthologGroup_pk primary key (gene_id); diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql index 5362c8465e..d6fd747109 100644 --- a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql +++ b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql @@ -1,10 +1,8 @@ create table :SCHEMA.TranscriptOrthologGroup as SELECT ta.source_id AS source_id, - ogas.group_id, + ogas.group_id FROM webready.transcriptattributes ta, apidb.orthologgroupaasequence ogas - WHERE ta.aa_sequence_id = ogas.aa_sequence_id -; + WHERE ta.aa_sequence_id = ogas.aa_sequence_id; alter table :SCHEMA.TranscriptOrthologGroup - add constraint TranscriptOrthologGroup_pk primary key (source_id) -; + add constraint TranscriptOrthologGroup_pk primary key (source_id); From 758fa2d2bae166c388a89a4ca49b251158e35cc5 Mon Sep 17 00:00:00 2001 From: Richard Demko Date: Mon, 9 Jun 2025 12:20:42 -0400 Subject: [PATCH 107/112] Resolving duplicate row issue --- Model/lib/psql/webready/comparative/GeneOrthologGroup.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql index 404c23f4dc..a02ec0a2f2 100644 --- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql +++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql @@ -1,5 +1,5 @@ create table :SCHEMA.GeneOrthologGroup as - SELECT pa.gene_source_id AS gene_id, + SELECT distinct(pa.gene_source_id) AS gene_id, ogas.group_id FROM webready.proteinattributes pa, apidb.orthologgroupaasequence ogas From 4b24d0649417f22d4c8e52bf5c37c0778ce7bba4 Mon Sep 17 00:00:00 2001 From: Richard Demko Date: Mon, 9 Jun 2025 12:57:16 -0400 Subject: [PATCH 108/112] Resolving primary key issue --- Model/lib/psql/webready/comparative/GeneOrthologGroup.psql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql index a02ec0a2f2..d2f229f099 100644 --- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql +++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql @@ -1,8 +1,9 @@ - create table :SCHEMA.GeneOrthologGroup as - SELECT distinct(pa.gene_source_id) AS gene_id, + create table :SCHEMA.GeneOrthologGroup as + SELECT DISTINCT pa.gene_source_id AS gene_id, ogas.group_id FROM webready.proteinattributes pa, apidb.orthologgroupaasequence ogas WHERE pa.aa_sequence_id = ogas.aa_sequence_id; alter table :SCHEMA.GeneOrthologGroup - add constraint GeneOrthologGroup_pk primary key (gene_id); + ADD CONSTRAINT GeneOrthologGroup_pk PRIMARY KEY (gene_id, group_id); + From d25cdba6584bffb723d8374cf9f835d3ef4fe42e Mon Sep 17 00:00:00 2001 From: Richard Demko Date: Mon, 9 Jun 2025 14:43:37 -0400 Subject: [PATCH 109/112] Adding distinct full_id to proteinSequenceGroup --- Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql index bfdd39fcc1..de2df5a828 100644 --- a/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql +++ b/Model/lib/psql/webready/comparative/ProteinSequenceGroup.psql @@ -1,6 +1,6 @@ create table :SCHEMA.ProteinSequenceGroup as SELECT - aas.source_id AS full_id, + distinct(aas.source_id) AS full_id, aas.source_id, aas.aa_sequence_id, length(aas.sequence) as length, From e20cbdb5a61bca2102f101594356b55e981f2c9e Mon Sep 17 00:00:00 2001 From: Sufen Hu Date: Mon, 16 Jun 2025 11:56:18 -0400 Subject: [PATCH 110/112] Avoid hardcoding webready; use :SCHEMA for better flexibility --- Model/lib/psql/webready/comparative/GeneOrthologGroup.psql | 2 +- .../lib/psql/webready/comparative/TranscriptOrthologGroup.psql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql index d2f229f099..1f895fde29 100644 --- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql +++ b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql @@ -1,7 +1,7 @@ create table :SCHEMA.GeneOrthologGroup as SELECT DISTINCT pa.gene_source_id AS gene_id, ogas.group_id - FROM webready.proteinattributes pa, + FROM :SCHEMA.proteinattributes pa, apidb.orthologgroupaasequence ogas WHERE pa.aa_sequence_id = ogas.aa_sequence_id; alter table :SCHEMA.GeneOrthologGroup diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql index d6fd747109..fcb9f3a012 100644 --- a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql +++ b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql @@ -1,7 +1,7 @@ create table :SCHEMA.TranscriptOrthologGroup as SELECT ta.source_id AS source_id, ogas.group_id - FROM webready.transcriptattributes ta, + FROM :SCHEMA.transcriptattributes ta, apidb.orthologgroupaasequence ogas WHERE ta.aa_sequence_id = ogas.aa_sequence_id; alter table :SCHEMA.TranscriptOrthologGroup From 6af3a1bf5e7b6134c02bcc59517b25b0cf12eb82 Mon Sep 17 00:00:00 2001 From: steve-fischer-200 Date: Wed, 25 Jun 2025 13:51:15 -0400 Subject: [PATCH 111/112] use PG remote schemas --- .../tuningManager/apiTuningManager-pruned.xml | 53 +++++++++---------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml b/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml index 2dd94f6212..587655a7df 100644 --- a/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml +++ b/Model/lib/xml/tuningManager/apiTuningManager-pruned.xml @@ -303,18 +303,14 @@ @@ -2825,23 +2821,22 @@ sub readClob { , au.creationdate as creation_date , au.apolloevidencecode as evidence_code , au.apolloevidencecodeassignment as evidence_code_parameter - , 'TODO add owner details' as apollo_owner_details - --, (select firstname.value || ' ' || lastname.value || ', ' || address.value from - -- (select uap.value - -- from useraccounts.accounts@acctdbn.profile uaa, useraccounts.account_properties@acctdbn.profile uap - -- where uaa.user_id = uap.user_id - -- and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1) - -- and uap.key = 'first_name') firstname, - -- (select uap.value - -- from useraccounts.accounts@acctdbn.profile uaa, useraccounts.account_properties@acctdbn.profile uap - -- where uaa.user_id = uap.user_id - -- and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1) - -- and uap.key = 'last_name') lastname, - -- (select uap.value - -- from useraccounts.accounts@acctdbn.profile uaa, useraccounts.account_properties@acctdbn.profile uap - -- where uaa.user_id = uap.user_id - -- and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1) - -- and uap.key = 'organization') address) as apollo_owner_details + , (select firstname.value || ' ' || lastname.value || ', ' || address.value from + (select uap.value + from &remote_useraccounts_schema.accounts.profile uaa, &remote_useraccounts_schema.account_properties.profile uap + where uaa.user_id = uap.user_id + and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1) + and uap.key = 'first_name') firstname, + (select uap.value + from &remote_useraccounts_schema.accounts.profile uaa, &remote_useraccounts_schema.account_properties.profile uap + where uaa.user_id = uap.user_id + and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1) + and uap.key = 'last_name') lastname, + (select uap.value + from &remote_useraccounts_schema.accounts.profile uaa, &remote_useraccounts_schema.account_properties.profile uap + where uaa.user_id = uap.user_id + and uaa.stable_id = REGEXP_SUBSTR(apolloowner,'[^,]+',1,1) + and uap.key = 'organization') address) as apollo_owner_details FROM ApolloUpdate au, GeneAttributes ga, TranscriptAttributes ta WHERE au.type = 'gene' AND (au.attr like '%gene_product=%' From 3d014bcedb7743bbc072d3971b5c3d43ebead428 Mon Sep 17 00:00:00 2001 From: John Brestelli <39096257+jbrestel@users.noreply.github.com> Date: Thu, 17 Jul 2025 15:04:47 -0400 Subject: [PATCH 112/112] JB KC Webtables (#86) * remove unused filter * add GeneProduct table * only delete for this org * add splicesitetranscript * First pass at PathwayEC global table * First pass at transcript universe table * Add some pathway details to PathwayEc * Add partitioning columns and types, remove old code * Remove old tuning manager code, create empty TranscriptPathway table * Load into TranscriptEC and TranscriptPathway tables * File renamed * Use apidb.organism for looping * Use a different delimiter internally in PLPGSQL blocks * Fix SQL error * move genemaxintrongij to genemaxjunction * subset of Junction Tables * move GeneIntJuncStats to GeneJunctionStats * fix up genejunctionstats * rename genejunction stats as this table is not about genes * fix name * Add missing table * Add missing alias * Fix issue with EC number positions and add timestamp * PSQL for undoing TranscriptPathway * add few indexes for transcriptpathway tables * minor changes to hit partitions * wip; moved some comparative things around * remove GroupPhylogeneticProfile as it is now circular. other small changes * add custom undo script for org specific genomics ortholog tables * refactor intronsupportlevel * new name for junction stats table * Remove PathwayNodeGene SQLs, we don't need this table * Fix garbled comments * Load into existing partitioned table instead of creating * Add SQL to create empty partitioned table and to truncate on undo * low conf IntronJunction Support level should be inclusive --------- Co-authored-by: Kathryn Crouch --- .../comparative/GeneOrthologGroup.psql | 9 - .../comparative/GroupPhylogeneticProfile.psql | 14 - .../GroupPhylogeneticProfile_ix.psql | 3 - .../comparative/LoadOrthologTables.psql | 29 ++ .../comparative/LoadPathwaysGeneTable.psql | 58 +++ .../comparative/LoadTranscriptPathway.psql | 174 ++++++++ .../webready/comparative/PhyleticPattern.psql | 2 +- ...up_ix.sql => ProteinSequenceGroup_ix.psql} | 0 .../comparative/TranscriptOrthologGroup.psql | 8 - .../comparative/Undo_LoadOrthologTables.psql | 4 + .../Undo_LoadPathwaysGeneTable.psql | 3 + .../Undo_LoadTranscriptPathway.psql | 5 + Model/lib/psql/webready/global/PathwayEC.psql | 31 ++ .../psql/webready/global/PathwayEC_ix.psql | 6 + .../webready/orgSpecific/GeneAttributes.psql | 7 +- .../orgSpecific/GeneIntJuncStats.psql | 25 -- .../orgSpecific/GeneIntJuncStats_ix.psql | 3 - .../orgSpecific/GeneIntronJunction.psql | 403 ++++++++---------- .../orgSpecific/GeneIntronJunction_ix.psql | 21 +- .../orgSpecific/GeneMaxIntronGIJ.psql | 47 -- .../orgSpecific/GeneMaxIntronGIJ_ix.psql | 3 - .../webready/orgSpecific/GeneMaxJunction.psql | 69 +++ .../orgSpecific/GeneMaxJunction_ix.psql | 3 + .../orgSpecific/GeneOrthologGroup.psql | 21 + .../orgSpecific/GeneOrthologGroup_ix.psql | 2 + .../webready/orgSpecific/GeneProduct.psql | 130 ++++++ .../webready/orgSpecific/GeneProduct_ix.psql | 2 + .../orgSpecific/GenomicSeqAttributes.psql | 2 +- .../orgSpecific/GenomicSeqJunctionStats.psql | 32 ++ .../GenomicSeqJunctionStats_ix.psql | 2 + .../orgSpecific/IntronSupportLevel.psql | 168 +++----- .../orgSpecific/JunctionGeneLocation.psql | 23 + .../orgSpecific/JunctionGeneLocation_id.psql | 5 + .../JunctionToCoverageProfileMapping.psql | 143 +++++++ .../JunctionToCoverageProfileMapping_ix.psql | 2 + .../webready/orgSpecific/PathwayNodeGene.psql | 13 - .../orgSpecific/PathwaysGeneTable.psql | 66 ++- .../orgSpecific/SpliceSiteTranscript.psql | 33 ++ ...e_ix.psql => SpliceSiteTranscript_ix.psql} | 0 .../orgSpecific/TranscriptAttributes.psql | 4 +- .../webready/orgSpecific/TranscriptEC.psql | 35 ++ .../webready/orgSpecific/TranscriptEC_ix.psql | 5 + .../orgSpecific/TranscriptOrthologGroup.psql | 23 + .../TranscriptOrthologGroup_ix.psql | 2 + .../orgSpecific/TranscriptPathway.psql | 115 ++--- .../psql/webready/unknown/NameMappingGIJ.psql | 118 ----- .../webready/unknown/NameMappingGIJ_ix.psql | 3 - 47 files changed, 1160 insertions(+), 716 deletions(-) delete mode 100644 Model/lib/psql/webready/comparative/GeneOrthologGroup.psql delete mode 100644 Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql delete mode 100644 Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql create mode 100644 Model/lib/psql/webready/comparative/LoadOrthologTables.psql create mode 100644 Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql create mode 100644 Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql rename Model/lib/psql/webready/comparative/{ProteinSequenceGroup_ix.sql => ProteinSequenceGroup_ix.psql} (100%) delete mode 100644 Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql create mode 100644 Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql create mode 100644 Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql create mode 100644 Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql create mode 100644 Model/lib/psql/webready/global/PathwayEC.psql create mode 100644 Model/lib/psql/webready/global/PathwayEC_ix.psql delete mode 100644 Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql delete mode 100644 Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql delete mode 100644 Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql delete mode 100644 Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql create mode 100644 Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql create mode 100644 Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql create mode 100644 Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql create mode 100644 Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql create mode 100644 Model/lib/psql/webready/orgSpecific/GeneProduct.psql create mode 100644 Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql create mode 100644 Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql create mode 100644 Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql create mode 100644 Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql create mode 100644 Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql create mode 100644 Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql create mode 100644 Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql delete mode 100644 Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql create mode 100644 Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql rename Model/lib/psql/webready/orgSpecific/{PathwayNodeGene_ix.psql => SpliceSiteTranscript_ix.psql} (100%) create mode 100644 Model/lib/psql/webready/orgSpecific/TranscriptEC.psql create mode 100644 Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql create mode 100644 Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql create mode 100644 Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql delete mode 100644 Model/lib/psql/webready/unknown/NameMappingGIJ.psql delete mode 100644 Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql diff --git a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql b/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql deleted file mode 100644 index 1f895fde29..0000000000 --- a/Model/lib/psql/webready/comparative/GeneOrthologGroup.psql +++ /dev/null @@ -1,9 +0,0 @@ - create table :SCHEMA.GeneOrthologGroup as - SELECT DISTINCT pa.gene_source_id AS gene_id, - ogas.group_id - FROM :SCHEMA.proteinattributes pa, - apidb.orthologgroupaasequence ogas - WHERE pa.aa_sequence_id = ogas.aa_sequence_id; - alter table :SCHEMA.GeneOrthologGroup - ADD CONSTRAINT GeneOrthologGroup_pk PRIMARY KEY (gene_id, group_id); - diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql deleted file mode 100644 index dcbb9370d4..0000000000 --- a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile.psql +++ /dev/null @@ -1,14 +0,0 @@ - - CREATE table :SCHEMA.GroupPhylogeneticProfile as - SELECT rep.orthomcl_name, pp.profile_string - FROM apidb.PhylogeneticProfile pp, - (SELECT orthomcl_name, max(source_id) as source_id - FROM :SCHEMA.GeneAttributes - GROUP BY orthomcl_name) rep - WHERE rep.source_id = pp.source_id - - ; - - - - diff --git a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql b/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql deleted file mode 100644 index 1753b0f2db..0000000000 --- a/Model/lib/psql/webready/comparative/GroupPhylogeneticProfile_ix.psql +++ /dev/null @@ -1,3 +0,0 @@ - create index group_pp_ix - on :SCHEMA.GroupPhylogeneticProfile (orthomcl_name) - ; diff --git a/Model/lib/psql/webready/comparative/LoadOrthologTables.psql b/Model/lib/psql/webready/comparative/LoadOrthologTables.psql new file mode 100644 index 0000000000..ece00e833f --- /dev/null +++ b/Model/lib/psql/webready/comparative/LoadOrthologTables.psql @@ -0,0 +1,29 @@ +Truncate table :SCHEMA.GeneOrthologGroup; +Truncate table :SCHEMA.TranscriptOrthologGroup; + +insert into :SCHEMA.GeneOrthologGroup (gene_id, group_id, project_id, org_abbrev, modification_date) +SELECT ga.gene_source_id AS gene_id + , ogas.group_id + , ga.project_id + , ga.org_abbrev + , timestamp as modification_date + FROM :SCHEMA.geneattributes ga, + apidb.orthologgroupaasequence ogas + WHERE ga.aa_sequence_id = ogas.aa_sequence_id; + + + +insert into :SCHEMA.TranscriptOrthologGroup (source_id, gene_id, group_id, project_id, org_abbrev, modification_date) +SELECT ta.source_id AS source_id + , ta.gene_source_id as gene_id + , ogas.group_id + , ta.project_id + , ta.org_abbrev + , timestamp as modification_date + FROM :SCHEMA.transcriptattributes ta + , apidb.orthologgroupaasequence ogas + WHERE ta.aa_sequence_id = ogas.aa_sequence_id; + + + + diff --git a/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql b/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql new file mode 100644 index 0000000000..28ffcf3b4f --- /dev/null +++ b/Model/lib/psql/webready/comparative/LoadPathwaysGeneTable.psql @@ -0,0 +1,58 @@ +/* ATTENTION: This script is run using a custom workflow step class */ +/* This accommodates the required to retain an empty table on undo */ + +TRUNCATE TABLE :SCHEMA.PathwaysGeneTable; + +DO $$ + DECLARE org record:PLPGSQL_DELIM + BEGIN + FOR org IN (SELECT DISTINCT abbrev FROM apidb.organism) + LOOP + INSERT INTO :SCHEMA.PathwaysGeneTable ( + SELECT t2.*, current_timestamp AS modification_date FROM ( + SELECT DISTINCT + gene_source_id + , pathway_source_id + , pathway_name + , count(reaction_source_id) AS reactions + , enzyme + , expasy_url + , pathway_source + , exact_match + , project_id + , org_abbrev + FROM ( + SELECT DISTINCT + tp.gene_source_id + , tp.project_id + , tp.pathway_source_id + , tp.pathway_name + , tp.org_abbrev + , pr.reaction_source_id + , pr.enzyme + , pr.expasy_url + , tp.pathway_source + , CASE MAX(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match + FROM :SCHEMA.TranscriptPathway tp + , apidbtuning.PathwayAttributes pa + , apidbtuning.PathwayCompounds pc + , apidbtuning.PathwayReactions pr + WHERE tp.pathway_id = pa.pathway_id + AND pc.pathway_id = pa.pathway_id + AND pr.reaction_id = pc.reaction_id + AND pr.ext_db_name = pc.ext_db_name + AND tp.ec_number_pathway = pr.enzyme + AND tp.wildcard_count_gene <= tp.wildcard_count_pathway + AND pr.enzyme != '-.-.-.-' + AND tp.org_abbrev = org.abbrev + GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrev, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source + ) t + GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match + ) t2 + ORDER BY pathway_source, lower(pathway_name) + ):PLPGSQL_DELIM + COMMIT:PLPGSQL_DELIM + END LOOP:PLPGSQL_DELIM + END:PLPGSQL_DELIM +$$ LANGUAGE PLPGSQL; + diff --git a/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql b/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql new file mode 100644 index 0000000000..afd718fb47 --- /dev/null +++ b/Model/lib/psql/webready/comparative/LoadTranscriptPathway.psql @@ -0,0 +1,174 @@ +/* ATTENTION: This script is run using a custom workflow step class */ +/* This accommodates the requirement to retain empty TranscriptEC and TranscriptPathway tables on undo */ + +/* STEP 1: Make sure temp tables have been dropped before starting */ + +DROP TABLE IF EXISTS :SCHEMA.TranscriptEcUniverse; +DROP TABLE IF EXISTS :SCHEMA.PathwayEcUniverse; +DROP TABLE IF EXISTS :SCHEMA.TranscriptPathwayEcMatch; + +-- Just to be safe add these here +TRUNCATE TABLE :SCHEMA.TranscriptPathway; +TRUNCATE TABLE :SCHEMA.TranscriptEC; + + +/* STEP 2: Load into the TranscriptEC table. This must be done here so that we capture ortho-derived EC numbers*/ + +/* ATTENTION: this step loads into an empty partitioned table created in the orgSpecific graph */ + +DO $$ + DECLARE org RECORD:PLPGSQL_DELIM + BEGIN + FOR org IN (SELECT DISTINCT taxon_id, abbrev from apidb.organism) + LOOP + INSERT INTO :SCHEMA.TranscriptEc ( + SELECT DISTINCT + ta.source_id + , ta.gene_source_id + , ec.enzyme_class_id + , ec.ec_number + , ec.ec_number_1 + , ec.ec_number_2 + , ec.ec_number_3 + , ec.ec_number_4 + , regexp_count(ec.ec_number, '-') as wildcard_count + , asec.evidence_code + , ta.project_id + , org.abbrev as org_abbrev + , current_timestamp as modification_date + FROM sres.EnzymeClass ec + , dots.AaSequenceEnzymeClass asec + , :SCHEMA.transcriptattributes ta + WHERE asec.aa_sequence_id = ta.aa_sequence_id + AND asec.enzyme_class_id = ec.enzyme_class_id + AND ta.org_abbrev = org.abbrev + ):PLPGSQL_DELIM + COMMIT:PLPGSQL_DELIM + END LOOP:PLPGSQL_DELIM + END:PLPGSQL_DELIM +$$ LANGUAGE PLPGSQL; + + +/* STEP 3: Extracts the distinct EC numbers from TranscriptEC */ +/* This represents the "universe" of EC numbers associated to transcripts */ +/* Temp table, will be dropped */ + +CREATE TABLE :SCHEMA.TranscriptEcUniverse as ( + SELECT DISTINCT + enzyme_class_id + , ec_number + , ec_number_1 + , ec_number_2 + , ec_number_3 + , ec_number_4 + , wildcard_count + FROM :SCHEMA.TranscriptEc +); + +/* STEP 4: Extract the distinct EC number from PathwayEC */ +/* This represents the "universe" of EC numbers associated to pathways */ +/* Temp table, will be dropped */ + +CREATE TABLE :SCHEMA.PathwayEcUniverse as ( + SELECT DISTINCT + enzyme_class_id + , ec_number + , ec_number_1 + , ec_number_2 + , ec_number_3 + , ec_number_4 + , wildcard_count + FROM :SCHEMA.PathwayEc +); + + +/* STEP 5: Match EC numbers from the transcript universe and EC numbers from the pathway universe */ +/* Use the universe tables to avoid redundancy */ +/* Temp table, will be dropped */ + +CREATE TABLE :SCHEMA.TranscriptPathwayEcMatch as ( + SELECT DISTINCT + teu.enzyme_class_id AS transcript_enzyme_class_id + , peu.enzyme_class_id AS pathway_enzyme_class_id + , teu.wildcard_count AS wildcard_count_transcript + , peu.wildcard_count AS wildcard_count_pathway + , teu.ec_number AS ec_number_transcript + , peu.ec_number AS ec_number_pathway + FROM :SCHEMA.TranscriptEcUniverse teu + , :SCHEMA.PathwayEc peu + + -- this part does ec number expansion using the individual digits to avoid slow like syntax + WHERE (teu.ec_number_1 = peu.ec_number_1 or teu.ec_number_1 is null or peu.ec_number_1 is null) + AND (teu.ec_number_2 = peu.ec_number_2 or teu.ec_number_2 is null or peu.ec_number_2 is null) + AND (teu.ec_number_3 = peu.ec_number_3 or teu.ec_number_3 is null or peu.ec_number_3 is null) + AND (teu.ec_number_4 = peu.ec_number_4 or teu.ec_number_4 is null or peu.ec_number_4 is null) +); + + +/* STEP 6: Map the matched EC numbers to map back to both pathways and transcripts */ +/* ATTENTION: this step loads into an empty partitioned table created in the orgSpecific graph */ + +/* This is the equivalent of the old TranscriptPathway tuning table */ + +DO $$ + DECLARE org RECORD:PLPGSQL_DELIM + BEGIN + FOR org IN (SELECT DISTINCT taxon_id, abbrev FROM apidb.organism) + LOOP + INSERT INTO :SCHEMA.transcriptpathway ( + SELECT DISTINCT + + -- gene info + ta.source_id + , ta.gene_source_id + + -- pathway info + , pa.source_id AS pathway_source_id + , pa.name AS pathway_name + , pa.pathway_id + , pa.pathway_source + , pec.external_database_release_id + + -- info about match + , tpem.ec_number_transcript AS ec_number_gene + , tpem.wildcard_count_transcript AS wildcard_count_gene + , tpem.ec_number_pathway + , tpem.wildcard_count_pathway + , CASE WHEN tpem.ec_number_pathway = tpem.ec_number_transcript + THEN 1 + ELSE 0 END AS exact_match + , CASE WHEN tpem.wildcard_count_pathway + tpem.wildcard_count_transcript = 0 + THEN 1 + ELSE 0 END AS complete_ec + + -- for partitioning + , ta.project_id + , org.abbrev AS org_abbrev + , current_timestamp AS modification_date + FROM :SCHEMA.TranscriptPathwayEcMatch tpem + , :SCHEMA.PathwayEc pec + , :SCHEMA.PathwayAttributes pa + , :SCHEMA.TranscriptAttributes ta + , :SCHEMA.TranscriptEc tec + WHERE tpem.ec_number_transcript = tec.ec_number + AND tpem.ec_number_pathway = pec.ec_number + AND pa.pathway_id = pec.pathway_id + AND ta.source_id = tec.source_id + AND tec.org_abbrev = org.abbrev + AND ta.org_abbrev = org.abbrev + -- JB: It is unclear what the intention here was. This will not remove any rows and shouldn't + -- AND ( + -- (ta.orthomcl_name IS NULL AND tec.evidence_code != 'OrthoMCLDerived') + -- OR ta.orthomcl_name IS NOT NULL + -- ) + ):PLPGSQL_DELIM + COMMIT:PLPGSQL_DELIM + END LOOP:PLPGSQL_DELIM + END:PLPGSQL_DELIM +$$ LANGUAGE PLPGSQL; + + +/* STEP 7: Delete temp tables */ +DROP TABLE :SCHEMA.TranscriptEcUniverse; +DROP TABLE :SCHEMA.PathwayEcUniverse; +DROP TABLE :SCHEMA.TranscriptPathwayEcMatch; diff --git a/Model/lib/psql/webready/comparative/PhyleticPattern.psql b/Model/lib/psql/webready/comparative/PhyleticPattern.psql index 6b5f20ca3c..1fad7add7c 100644 --- a/Model/lib/psql/webready/comparative/PhyleticPattern.psql +++ b/Model/lib/psql/webready/comparative/PhyleticPattern.psql @@ -1,6 +1,6 @@ -CREATE TABLE PhyleticPattern AS +CREATE TABLE :SCHEMA.PhyleticPattern AS (SELECT actual.group_name, actual.alveolata as alveolata_actual, total.alveolata as alveolata_total, diff --git a/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql b/Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.psql similarity index 100% rename from Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.sql rename to Model/lib/psql/webready/comparative/ProteinSequenceGroup_ix.psql diff --git a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql deleted file mode 100644 index fcb9f3a012..0000000000 --- a/Model/lib/psql/webready/comparative/TranscriptOrthologGroup.psql +++ /dev/null @@ -1,8 +0,0 @@ - create table :SCHEMA.TranscriptOrthologGroup as - SELECT ta.source_id AS source_id, - ogas.group_id - FROM :SCHEMA.transcriptattributes ta, - apidb.orthologgroupaasequence ogas - WHERE ta.aa_sequence_id = ogas.aa_sequence_id; - alter table :SCHEMA.TranscriptOrthologGroup - add constraint TranscriptOrthologGroup_pk primary key (source_id); diff --git a/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql b/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql new file mode 100644 index 0000000000..9a50c36b62 --- /dev/null +++ b/Model/lib/psql/webready/comparative/Undo_LoadOrthologTables.psql @@ -0,0 +1,4 @@ +/* This script truncates the tables loaded by LoadTranscriptPathway */ +/* The table is not dropped and the partitions are retained */ +Truncate table :SCHEMA.GeneOrthologGroup; +Truncate table :SCHEMA.TranscriptOrthologGroup; diff --git a/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql b/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql new file mode 100644 index 0000000000..6c838ced50 --- /dev/null +++ b/Model/lib/psql/webready/comparative/Undo_LoadPathwaysGeneTable.psql @@ -0,0 +1,3 @@ +/* This script truncates the tables loaded by LoadPathwaysGeneTable */ + +TRUNCATE TABLE :SCHEMA.PathwaysGeneTable; diff --git a/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql b/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql new file mode 100644 index 0000000000..329c22d305 --- /dev/null +++ b/Model/lib/psql/webready/comparative/Undo_LoadTranscriptPathway.psql @@ -0,0 +1,5 @@ +/* This script truncates the tables loaded by LoadTranscriptPathway */ +/* The table is not dropped and the partitions are retained */ + +TRUNCATE TABLE :SCHEMA.TranscriptPathway; +TRUNCATE TABLE :SCHEMA.TranscriptEC; diff --git a/Model/lib/psql/webready/global/PathwayEC.psql b/Model/lib/psql/webready/global/PathwayEC.psql new file mode 100644 index 0000000000..6a688da186 --- /dev/null +++ b/Model/lib/psql/webready/global/PathwayEC.psql @@ -0,0 +1,31 @@ + drop table if exists :SCHEMA.PathwayEC; + + /* this table represents the universe of EC numbers that are associated with enzyme nodes in metabolic pathways*/ + + CREATE TABLE :SCHEMA.PathwayEC as + + SELECT DISTINCT ec.enzyme_class_id -- use this for joining back to pathways later + , ec.ec_number -- useful for quick exact matches + , ec.ec_number_1 --have the 4 EC number components separately avoids lots of like statements later + , ec.ec_number_2 + , ec.ec_number_3 + , ec.ec_number_4 + , regexp_count(ec.ec_number, '-') as wildcard_count -- how many of the enzyme number positions are unknown + , pn.pathway_id + , p.external_database_release_id + + FROM sres.EnzymeClass ec + , sres.OntologyTerm ot + , sres.PathwayNode pn + , sres.Pathway p + -- find all pathway nodes representing enzymes + + WHERE pn.pathway_node_type_id = ot.ontology_term_id + AND ot.name = 'enzyme' + AND p.pathway_id = pn.pathway_id + -- we don't want the root or this gets matched to everything! + AND pn.display_label != '-.-.-.-' + -- now we can get the enzyme details for the enzyme pathway nodes + AND ec.enzyme_class_id = pn.row_id + ; + diff --git a/Model/lib/psql/webready/global/PathwayEC_ix.psql b/Model/lib/psql/webready/global/PathwayEC_ix.psql new file mode 100644 index 0000000000..0c652289a8 --- /dev/null +++ b/Model/lib/psql/webready/global/PathwayEC_ix.psql @@ -0,0 +1,6 @@ +CREATE INDEX PathwayEC_1_idx ON :SCHEMA.PathwayEC (pathway_id, ec_number, external_database_release_id) + ; + + +CREATE INDEX PathwayEC_2_idx ON :SCHEMA.PathwayEC (ec_number_1, ec_number_2, ec_number_3, ec_number_4, enzyme_class_id, ec_number, wildcard_count) + ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql index 9f8295b7cc..7ee49f52cd 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneAttributes.psql @@ -12,7 +12,7 @@ , sequence_id , gene_name AS name , COALESCE(aggregates.product, aggregates.transcript_product) as old_product - -- , COALESCE(gp.product, 'unspecified product') as product + , COALESCE(gp.product, 'unspecified product') as product , 'FIX ME' as product , gene_type , gene_ebi_biotype @@ -88,9 +88,8 @@ OR d.name like '%_dbxref_uniprot_from_annotation_RSRC') ) t GROUP BY na_feature_id - ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id --- LEFT JOIN :ORG_ABBREVGeneProduct gp ON ta.gene_source_id = gp.source_id - WHERE ta.org_abbrev = ':ORG_ABBREV' + ) uniprot ON ta.gene_na_feature_id = uniprot.na_feature_id and ta.org_abbrev = ':ORG_ABBREV' + LEFT JOIN :SCHEMA.GeneProduct gp ON ta.gene_source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV' ORDER BY ta.gene_source_id :DECLARE_PARTITION; diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql deleted file mode 100644 index b8a8e7dbcf..0000000000 --- a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats.psql +++ /dev/null @@ -1,25 +0,0 @@ -:CREATE_AND_POPULATE - - - CREATE TABLE GeneIntJuncStats AS - WITH org_tot AS ( - SELECT - gs.organism, - min(gij.total_unique) as min_annot_score, PERCENTILE_cont(0.005) within group (order by gij.total_unique asc) as perc005_annot_score, - PERCENTILE_cont(0.01) within group (order by gij.total_unique asc) as perc01_annot_score, - min(gij.percent_max) as min_annot_percent_max, PERCENTILE_cont(0.0001) within group (order by gij.percent_max asc) as perc0001_annot_percent_max, - PERCENTILE_cont(0.0005) within group (order by gij.percent_max asc) as perc0005_annot_percent_max, - floor(max(gij.segment_end - gij.segment_start) * 1.25) as max_intron_length - FROM geneintronjunction gij, genomicseqattributes gs - WHERE gs.na_sequence_id = gij.na_sequence_id - AND gij.annotated_intron = 'Yes' - GROUP BY gs.organism - ) - SELECT gs.na_sequence_id, gs.source_id, ot.* - FROM genomicseqattributes gs, org_tot ot - WHERE gs.organism = ot.organism - AND gs.na_sequence_id in (SELECT DISTINCT na_sequence_id FROM apidb.intronjunction) - - -:DECLARE_PARTITION; - diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql deleted file mode 100644 index 89d3004344..0000000000 --- a/Model/lib/psql/webready/orgSpecific/GeneIntJuncStats_ix.psql +++ /dev/null @@ -1,3 +0,0 @@ - create index GeneIntJuncStat_ix on :SCHEMA.GeneIntJuncStats (na_sequence_id) - ; - diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql index 9cf983cd01..39837511c8 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction.psql @@ -1,223 +1,192 @@ - - - CREATE UNLOGGED TABLE PanIOgij AS - SELECT i.protocol_app_id, i.protocol_app_node_id as input_pan_id, o.protocol_app_node_id as output_pan_id - FROM study.output o, study.input i - WHERE - o.protocol_app_node_id in ( - SELECT DISTINCT protocol_app_node_id - FROM study.protocolappnode where name like '%junctions%' or name like '%htseq%' - ) - AND o.protocol_app_id = i.protocol_app_id - - ; - - - - CREATE UNLOGGED TABLE annotgij ( - na_sequence_id, - start_min, - end_max, - is_reversed, - feature_type - ) AS - SELECT il.na_sequence_id, il.start_min,il.end_max, il.is_reversed, 'Intron' as feature_type - FROM apidb.IntronLocation il - GROUP by il.na_sequence_id, il.start_min,il.end_max,il.is_reversed - - ; - - - - CREATE UNIQUE INDEX :ORG_ABBREV_annottmpnew_pk_ix ON annotgij (na_sequence_id,start_min,end_max,is_reversed,feature_type) - - - ; - - - - create table GeneIdLocGIJ ( - na_sequence_id NUMERIC(10), - start_min NUMERIC, - is_reversed NUMERIC, - end_max NUMERIC, - na_feature_id NUMERIC(10), - source_id varchar(100), - total_expression NUMERIC - ) - - ; - - - - CREATE UNIQUE INDEX :ORG_ABBREV_gnattidloc_pk_ix ON GeneIdLocGIJ (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression) - - - ; - - - - DO $$ - DECLARE - idlist RECORD; - BEGIN - FOR idlist IN ( SELECT DISTINCT na_sequence_id FROM apidb.intronjunction) - LOOP - INSERT INTO GeneIdLocGIJ ( - SELECT gf.na_sequence_id,l.start_min,l.is_reversed,l.end_max,gf.na_feature_id,gf.source_id, - round(sum(nafe.value)::NUMERIC,2) as total_expression - FROM dots.genefeature gf, dots.nalocation l, namemappinggij je, results.nafeatureexpression nafe - WHERE gf.na_sequence_id = idlist.na_sequence_id - AND l.na_feature_id = gf.na_feature_id - AND gf.na_feature_id = nafe.na_feature_id - AND nafe.protocol_app_node_id = je.exp_pan_id - GROUP BY gf.na_sequence_id,l.start_min,l.is_reversed,l.end_max,gf.na_feature_id,gf.source_id - ); - commit; - END LOOP; - END; - $$ LANGUAGE PLPGSQL; - - ; - - - - create index :ORG_ABBREV_gnidloc_nafid_ix on GeneIdLocGIJ (na_feature_id) +drop table if exists :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp; + +drop table if exists :SCHEMA.:ORG_ABBREVGIJtmp; + +/* + Distinct Annotated Intron Locations +*/ +CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp ( + na_sequence_id, + start_min, + end_max, + is_reversed, + feature_type +) AS + SELECT il.na_sequence_id + , il.start_min + , il.end_max + , il.is_reversed + , 'Intron' as feature_type + FROM apidb.IntronLocation il + , dots.nasequence s + WHERE il.na_sequence_id = s.na_sequence_id + AND s.taxon_id = :TAXON_ID + GROUP by il.na_sequence_id + , il.start_min + , il.end_max + , il.is_reversed + + ; + +CREATE UNIQUE INDEX :ORG_ABBREV_annottmpnew_pk_ix ON :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp (na_sequence_id,start_min,end_max,is_reversed,feature_type) ; - - - create UNLOGGED table GIJtmp ( - NA_SEQUENCE_ID NUMERIC(10), - SEQUENCE_SOURCE_ID VARCHAR(100), - SEGMENT_START NUMERIC, - SEGMENT_END NUMERIC, - TOTAL_UNIQUE NUMERIC, - TOTAL_ISRPM NUMERIC, - IS_REVERSED NUMERIC(1), - INTRON_FEATURE_ID VARCHAR(200), - MATCHES_GENE_STRAND NUMERIC, - GENE_SOURCE_ID VARCHAR(100), - GENE_NA_FEATURE_ID NUMERIC, - ANNOTATED_INTRON VARCHAR(10) +/* + this table does the heavy lifting for gene intron junction calculations +*/ +create UNLOGGED table :SCHEMA.:ORG_ABBREVGIJtmp ( + NA_SEQUENCE_ID NUMERIC(10), + SEQUENCE_SOURCE_ID VARCHAR(100), + SEGMENT_START NUMERIC, + SEGMENT_END NUMERIC, + TOTAL_UNIQUE NUMERIC, + TOTAL_ISRPM NUMERIC, + IS_REVERSED NUMERIC(1), + INTRON_FEATURE_ID VARCHAR(200), + MATCHES_GENE_STRAND NUMERIC, + GENE_SOURCE_ID VARCHAR(100), + GENE_NA_FEATURE_ID NUMERIC, + ANNOTATED_INTRON VARCHAR(10) +) + ; + + +DO $$ + DECLARE + iter_length numeric := 4999 :PLPGSQL_DELIM + i_first_pos numeric := 1 :PLPGSQL_DELIM + i_last_pos numeric := i_first_pos + iter_length :PLPGSQL_DELIM + idlist RECORD :PLPGSQL_DELIM + BEGIN + FOR idlist IN ( + SELECT na_sequence_id + , source_id as sequence_source_id + , length + , taxon_id + , CASE WHEN step_mult > 500000 THEN 500000 ELSE step_mult END as seq_step_mult + FROM ( + SELECT gs.na_sequence_id + , gs.source_id + , gs.length + , gs.taxon_id + , 25000 * (1 + floor(gs.length/count(*))) as step_mult + FROM apidb.intronjunction ij + , dots.nasequence gs + WHERE gs.na_sequence_id = ij.na_sequence_id + AND gs.taxon_id = :TAXON_ID + GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id ) - - ; - - - - DO $$ - DECLARE - iter_length numeric := 4999; - i_first_pos numeric := 1; - i_last_pos numeric := i_first_pos + iter_length; - idlist RECORD; - BEGIN - FOR idlist IN ( - SELECT na_sequence_id, length, taxon_id, CASE WHEN step_mult > 500000 THEN 500000 ELSE step_mult END as seq_step_mult - FROM ( - SELECT gs.na_sequence_id, gs.length, gs.taxon_id, 25000 * (1 + floor(gs.length/count(*))) as step_mult - FROM apidb.intronjunction ij, dots.nasequence gs - WHERE gs.na_sequence_id = ij.na_sequence_id - GROUP BY gs.na_sequence_id, gs.length, gs.taxon_id - ) t - ORDER BY taxon_id - ) - LOOP - iter_length := idlist.seq_step_mult; - i_first_pos := 1; - i_last_pos := i_first_pos + iter_length; - WHILE i_first_pos < idlist.length - LOOP - INSERT INTO GIJtmp - SELECT DISTINCT - junc.*, - CASE - WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed - THEN 1 - ELSE 0 - END as matches_gene_strand, - last_value(ga.source_id) over w1 as gene_source_id, - last_value(ga.na_feature_id) over w1 as gene_na_feature_id, - CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron - FROM ( - SELECT ij.na_sequence_id,seq.source_id as sequence_source_id,ij.segment_start,ij.segment_end, - sum(ij.unique_reads) as total_unique, round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm, - ij.is_reversed,seq.source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id - FROM apidb.intronjunction ij, namemappinggij je, dots.nasequence seq - WHERE ij.na_sequence_id = idlist.na_sequence_id - AND ij.segment_start between i_first_pos and i_last_pos - AND ij.na_sequence_id = seq.na_sequence_id - AND ij.unique_reads >= 1 - AND je.junctions_pan_id = ij.protocol_app_node_id - AND je.multiplier < 20 - GROUP BY ij.na_sequence_id,ij.segment_start,ij.segment_end, ij.is_reversed,seq.source_id - ) junc - LEFT JOIN GeneIdLocGIJ ga ON - junc.na_sequence_id = ga.na_sequence_id - AND junc.segment_start >= ga.start_min - AND junc.segment_end <= ga.end_max - AND junc.is_reversed = ga.is_reversed - LEFT JOIN annotgij ag ON - junc.na_sequence_id = ag.na_sequence_id - AND junc.segment_start = ag.start_min - AND junc.segment_end = ag.end_max - AND junc.is_reversed = ag.is_reversed - WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron') - WINDOW w1 AS ( - PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type - ORDER BY ga.total_expression ASC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) - ; - commit; - i_first_pos := i_last_pos + 1; - i_last_pos := i_first_pos + iter_length; - END LOOP; - END LOOP; - END; - $$ LANGUAGE PLPGSQL; - - ; - - - - create index :ORG_ABBREV_gijtmp_gnscid_ix on gijtmp (gene_source_id) - - ; + ) + LOOP + iter_length := idlist.seq_step_mult :PLPGSQL_DELIM + i_first_pos := 1 :PLPGSQL_DELIM + i_last_pos := i_first_pos + iter_length :PLPGSQL_DELIM + WHILE i_first_pos < idlist.length + LOOP + INSERT INTO :SCHEMA.:ORG_ABBREVGIJtmp + SELECT DISTINCT + junc.*, + CASE + WHEN last_value(ga.is_reversed) over w1 = junc.is_reversed + THEN 1 + ELSE 0 + END as matches_gene_strand, + last_value(ga.source_id) over w1 as gene_source_id, + last_value(ga.na_feature_id) over w1 as gene_na_feature_id, + CASE ag.feature_type WHEN 'Intron' THEN 'Yes' ELSE 'No' END as annotated_intron + FROM ( + SELECT ij.na_sequence_id + , idlist.sequence_source_id + , ij.segment_start + , ij.segment_end + , sum(ij.unique_reads) as total_unique + , round(sum(ij.unique_reads * je.multiplier),2) as total_isrpm + , ij.is_reversed + , idlist.sequence_source_id || '_' || ij.segment_start || '_' || ij.segment_end || '_' || ij.is_reversed as intron_feature_id + FROM apidb.intronjunction ij + , :SCHEMA.JunctionToCoverageProfileMapping je + WHERE ij.na_sequence_id = idlist.na_sequence_id + AND ij.segment_start between i_first_pos and i_last_pos + AND ij.unique_reads >= 1 + AND je.junctions_pan_id = ij.protocol_app_node_id + AND je.multiplier < 20 + AND je.org_abbrev = ':ORG_ABBREV' + GROUP BY ij.na_sequence_id + , ij.segment_start + , ij.segment_end + , ij.is_reversed + , idlist.sequence_source_id + ) junc + LEFT JOIN :SCHEMA.JunctionGeneLocation ga + ON junc.na_sequence_id = ga.na_sequence_id + AND ga.org_abbrev = ':ORG_ABBREV' + AND junc.segment_start >= ga.start_min + AND junc.segment_end <= ga.end_max + AND junc.is_reversed = ga.is_reversed + LEFT JOIN :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp ag + ON junc.na_sequence_id = ag.na_sequence_id + AND junc.segment_start = ag.start_min + AND junc.segment_end = ag.end_max + AND junc.is_reversed = ag.is_reversed + WHERE (junc.total_unique >= 1 or ag.feature_type = 'Intron') + WINDOW w1 AS ( + PARTITION BY junc.na_sequence_id,junc.sequence_source_id,junc.segment_start,junc.segment_end, junc.is_reversed, junc.intron_feature_id,junc.total_unique, junc.total_isrpm,ag.feature_type + ORDER BY ga.total_expression ASC + ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) + :PLPGSQL_DELIM + commit :PLPGSQL_DELIM + i_first_pos := i_last_pos + 1 :PLPGSQL_DELIM + i_last_pos := i_first_pos + iter_length :PLPGSQL_DELIM + END LOOP :PLPGSQL_DELIM + END LOOP :PLPGSQL_DELIM + END :PLPGSQL_DELIM +$$ LANGUAGE PLPGSQL :PLPGSQL_DELIM + +; + + +create index :ORG_ABBREV_gijtmp_gnscid_ix on :SCHEMA.:ORG_ABBREVGIJtmp (gene_source_id) + + ; :CREATE_AND_POPULATE - - - CREATE TABLE GeneIntronJunction AS - SELECT - junc.*, CASE WHEN maxv.gene_source_id is not null and maxv.max_isrpm > 0 THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) ELSE null END as percent_max, - CASE WHEN maxv.gene_source_id is not null THEN 1 ELSE 0 END as contained, - CAST (null as numeric(10)) as taxon_id, - cast (null as numeric(10)) as upstream_gene_id, - cast (null as numeric) as upstream_distance, - cast (null as numeric(10)) as downstream_gene_id, - cast (null as numeric) as downstream_distance - FROM - gijtmp junc LEFT JOIN - ( - SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm - FROM gijtmp - WHERE gene_source_id is not null - GROUP BY gene_source_id - ) maxv ON junc.gene_source_id = maxv.gene_source_id - - -:DECLARE_PARTITION; - - - - UPDATE GeneIntronJunction gij - SET taxon_id - = (SELECT taxon_id - FROM dots.NaSequence - WHERE na_sequence_id = gij.na_sequence_id) - - ; - + SELECT + junc.* + , CASE + WHEN maxv.gene_source_id is not null and maxv.max_isrpm > 0 + THEN round((junc.total_isrpm / maxv.max_isrpm) * 100,2) + ELSE null + END as percent_max + , CASE + WHEN maxv.gene_source_id is not null + THEN 1 + ELSE 0 + END as contained + , :TAXON_ID as taxon_id +-- , CAST (null as numeric(10)) as upstream_gene_id +-- , CAST (null as numeric) as upstream_distance +-- , CAST (null as numeric(10)) as downstream_gene_id +-- , CAST (null as numeric) as downstream_distance + , ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev + , current_timestamp as modification_date + FROM + :SCHEMA.:ORG_ABBREVGIJtmp junc LEFT JOIN + ( + SELECT gene_source_id,max(total_unique) as max_unique, max(total_isrpm) as max_isrpm + FROM :SCHEMA.:ORG_ABBREVGIJtmp + WHERE gene_source_id is not null + GROUP BY gene_source_id + ) maxv ON junc.gene_source_id = maxv.gene_source_id +:DECLARE_PARTITION + ; + + + +drop table :SCHEMA.:ORG_ABBREVDistinctAnnotatedIntronsTmp + ; + +drop table :SCHEMA.:ORG_ABBREVGIJtmp + ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql index ad0f7593c5..22bd603691 100644 --- a/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql +++ b/Model/lib/psql/webready/orgSpecific/GeneIntronJunction_ix.psql @@ -1,13 +1,12 @@ - create index gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) - ; - - create index gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id) - ; +create index gijnew_loc_ix on :SCHEMA.GeneIntronJunction (na_sequence_id,segment_start,segment_end,is_reversed) + ; - create index gijnew_txnloc_ix - on :SCHEMA.GeneIntronJunction - (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed, - total_unique, total_isrpm, annotated_intron) - - ; +create index gijnew_gnscid_ix on :SCHEMA.GeneIntronJunction (intron_feature_id) + ; +create index gijnew_txnloc_ix + on :SCHEMA.GeneIntronJunction + (taxon_id, na_sequence_id, segment_start, segment_end, is_reversed, + total_unique, total_isrpm, annotated_intron) + + ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql deleted file mode 100644 index 0025d7d161..0000000000 --- a/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ.psql +++ /dev/null @@ -1,47 +0,0 @@ -:CREATE_AND_POPULATE - - - CREATE TABLE GeneMaxIntronGIJ ( - protocol_app_node_id NUMERIC(10), - gene_source_id VARCHAR(200), - max_unique NUMERIC, - max_isrpm NUMERIC, - sum_unique NUMERIC, - sum_isrpm NUMERIC, - avg_unique NUMERIC, - avg_isrpm NUMERIC - ) - - -:DECLARE_PARTITION; - - - - DO $$ - DECLARE - idlist RECORD; - BEGIN - FOR idlist IN ( - SELECT DISTINCT na_sequence_id - FROM GeneIdLocGIJ - ) - LOOP - INSERT INTO GeneMaxIntronGIJ ( - SELECT j.protocol_app_node_id, ga.source_id, max(unique_reads) as max_unique, max(round(j.unique_reads * mult.multiplier,2)) as max_isrpm, - sum(unique_reads) as sum_unique, sum(round(j.unique_reads * mult.multiplier,2)) as sum_isrpm, avg(unique_reads) as avg_unique, avg(round(j.unique_reads * mult.multiplier,2)) as avg_isrpm - FROM apidb.intronjunction j, GeneIdLocGIJ ga, namemappinggij mult - WHERE ga.na_sequence_id = idlist.na_sequence_id - AND ga.na_sequence_id = j.na_sequence_id - AND ga.start_min <= j.segment_start - AND ga.end_max >= j.segment_end - AND ga.is_reversed = j.is_reversed - AND j.protocol_app_node_id = mult.junctions_pan_id - GROUP BY j.protocol_app_node_id, ga.source_id - ); - commit; - END LOOP; - END; - $$ LANGUAGE PLPGSQL; - - ; - diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql deleted file mode 100644 index 9bd71417ae..0000000000 --- a/Model/lib/psql/webready/orgSpecific/GeneMaxIntronGIJ_ix.psql +++ /dev/null @@ -1,3 +0,0 @@ - CREATE INDEX GnMxIntGIJ_ix on :SCHEMA.GeneMaxIntronGIJ (gene_source_id,protocol_app_node_id) - ; - diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql new file mode 100644 index 0000000000..593fbbaee4 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction.psql @@ -0,0 +1,69 @@ +drop table if exists :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp + ; + +create unlogged table :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp ( + protocol_app_node_id NUMERIC(10), + gene_source_id VARCHAR(200), + max_unique NUMERIC, + max_isrpm NUMERIC, + sum_unique NUMERIC, + sum_isrpm NUMERIC, + avg_unique NUMERIC, + avg_isrpm NUMERIC, + project_id VARCHAR(20), + org_abbrev VARCHAR(20), + modification_date TIMESTAMP + ) +; + + +DO $$ + DECLARE + idlist RECORD :PLPGSQL_DELIM + BEGIN + FOR idlist IN ( + SELECT DISTINCT na_sequence_id + FROM :SCHEMA.JunctionGeneLocation + WHERE org_abbrev = ':ORG_ABBREV' + ) + LOOP + INSERT INTO :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp ( + SELECT j.protocol_app_node_id + , ga.source_id + , max(unique_reads) as max_unique + , max(round(j.unique_reads * mult.multiplier,2)) as max_isrpm + , sum(unique_reads) as sum_unique + , sum(round(j.unique_reads * mult.multiplier,2)) as sum_isrpm + , avg(unique_reads) as avg_unique + , avg(round(j.unique_reads * mult.multiplier,2)) as avg_isrpm + , ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev + , current_timestamp as modification_date + FROM apidb.intronjunction j + , :SCHEMA.JunctionGeneLocation ga + , :SCHEMA.JunctionToCoverageProfileMapping mult + WHERE ga.na_sequence_id = idlist.na_sequence_id + AND ga.na_sequence_id = j.na_sequence_id + AND ga.start_min <= j.segment_start + AND ga.end_max >= j.segment_end + AND ga.is_reversed = j.is_reversed + AND j.protocol_app_node_id = mult.junctions_pan_id + AND ga.org_abbrev = ':ORG_ABBREV' + AND mult.org_abbrev = ':ORG_ABBREV' + GROUP BY j.protocol_app_node_id, ga.source_id + ) :PLPGSQL_DELIM + commit :PLPGSQL_DELIM + END LOOP :PLPGSQL_DELIM + END :PLPGSQL_DELIM +$$ LANGUAGE PLPGSQL :PLPGSQL_DELIM + ; + + + +:CREATE_AND_POPULATE +select * from :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp +:DECLARE_PARTITION; + + +drop table :SCHEMA.:ORG_ABBREVGeneMaxIntronGIJTmp + ; diff --git a/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql new file mode 100644 index 0000000000..8915a4a6ea --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/GeneMaxJunction_ix.psql @@ -0,0 +1,3 @@ +CREATE INDEX GnMxIntGIJ_ix on :SCHEMA.GeneMaxJunction (gene_source_id,protocol_app_node_id) + ; + diff --git a/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql new file mode 100644 index 0000000000..0876f0b3a0 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup.psql @@ -0,0 +1,21 @@ +DROP TABLE if exists :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp; +/* + +ATTENTION: This table is empty. We will populate it in the comparative graph +We are creating them it so that they it is partitioned. +*/ + +CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp ( + gene_id VARCHAR(80), + group_id VARCHAR(16), + project_id varchar(20), + org_abbrev varchar(20), + modification_date timestamp +); + +:CREATE_AND_POPULATE +SELECT ogt.* from :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp ogt +:DECLARE_PARTITION; + + +DROP TABLE :SCHEMA.:ORG_ABBREVGeneOrthologGroupTmp; diff --git a/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql new file mode 100644 index 0000000000..c7ce2fd90a --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/GeneOrthologGroup_ix.psql @@ -0,0 +1,2 @@ +alter table :SCHEMA.GeneOrthologGroup + ADD CONSTRAINT GeneOrthologGroup_pk PRIMARY KEY (gene_id, group_id); diff --git a/Model/lib/psql/webready/orgSpecific/GeneProduct.psql b/Model/lib/psql/webready/orgSpecific/GeneProduct.psql new file mode 100644 index 0000000000..ed8670eda5 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/GeneProduct.psql @@ -0,0 +1,130 @@ + +create unlogged table :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp as +(select gf.na_feature_id + , gf.source_id + , gf.product + from dots.genefeature gf + , dots.nasequence nas + where gf.na_sequence_id = nas.na_sequence_id + and nas.taxon_id = :TAXON_ID +) +; + + +:CREATE_AND_POPULATE + with gfp_preferred + as (select source_id, + substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product, + count(*) as value_count + from (select distinct gf.source_id, gfp.product + from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, apidb.GeneFeatureProduct gfp + where gfp.na_feature_id = gf.na_feature_id + and gfp.is_preferred = 1) tmp + group by source_id), + gfp_any + as (select source_id, + substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product, + count(*) as value_count + from (select distinct gf.source_id, gfp.product + from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, apidb.GeneFeatureProduct gfp + where gfp.na_feature_id = gf.na_feature_id) tmp + group by source_id), + tp_preferred + as (select source_id, + substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product, + count(*) as value_count + from (select distinct gf.source_id, tp.product + from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t, apidb.TranscriptProduct tp + where t.parent_id = gf.na_feature_id + and tp.na_feature_id = t.na_feature_id + and tp.is_preferred = 1) tmp + group by source_id), + gf_product + as (select source_id, product + from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf + where product is not null), + tp_any + as (select source_id, + substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product, + count(*) as value_count + from (select distinct gf.source_id, tp.product + from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t, apidb.TranscriptProduct tp + where t.parent_id = gf.na_feature_id + and tp.na_feature_id = t.na_feature_id) tmp + group by source_id), + t_product + as (select source_id, + substr(STRING_AGG(product, ', ' order by product), 1, 4000) as product, + count(*) as value_count + from (select gf.source_id, t.product + from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf, dots.Transcript t + where t.parent_id = gf.na_feature_id + and t.product is not null) tmp + group by source_id) + select gf.source_id, + coalesce(gfp_preferred.product, gfp_any.product, tp_preferred.product, + gf.product, tp_any.product, t_product.product) + as product, + case + when gfp_preferred.product is not null + then gfp_preferred.value_count + when gfp_any.product is not null + then gfp_any.value_count + when tp_preferred.product is not null + then tp_preferred.value_count + when gf.product is not null + then 1 + when tp_any.product is not null + then tp_any.value_count + when t_product.product is not null + then t_product.value_count + else 0 -- 'unspecified product' + end + as value_count, + case + when gfp_preferred.product is not null + then 1 + when gfp_any.product is not null + then 2 + when tp_preferred.product is not null + then 3 + when gf.product is not null + then 4 + when tp_any.product is not null + then 5 + when t_product.product is not null + then 6 + else 7 + end + as source_rule, + ':PROJECT_ID' as project_id, + ':ORG_ABBREV' as org_abbrev, + current_timestamp as modification_date + from :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp gf + LEFT JOIN gfp_preferred ON gf.source_id = gfp_preferred.source_id + LEFT JOIN gfp_any ON gf.source_id = gfp_any.source_id + LEFT JOIN tp_preferred ON gf.source_id = tp_preferred.source_id + LEFT JOIN gf_product ON gf.source_id = gf_product.source_id + LEFT JOIN tp_any ON gf.source_id = tp_any.source_id + LEFT JOIN t_product ON gf.source_id = t_product.source_id +:DECLARE_PARTITION; + + +/* + + +Dependent Step + gene/protein_id -> uniprot accession and product + +This step will assume it has all uniprot accessions assined to the protein and uniprot product name and have pfams in our hand + + + + + +*/ + +delete from :SCHEMA.GeneProduct where product is null and org_abbrev = ':ORG_ABBREV'; + +drop table :SCHEMA.:ORG_ABBREVGeneFeatureProductTmp +; diff --git a/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql b/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql new file mode 100644 index 0000000000..910d965521 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/GeneProduct_ix.psql @@ -0,0 +1,2 @@ + CREATE INDEX GeneProduct_gene_idx ON :SCHEMA.GeneProduct (source_id, product) + ; diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql index 09f86475ac..e8f4481074 100644 --- a/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqAttributes.psql @@ -63,7 +63,7 @@ WHERE sequence.taxon_id = taxon.taxon_id AND sequence.sequence_ontology_id = so.ontology_term_id - AND (sequence.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0) + AND sequence.taxon_id = :TAXON_ID AND so.name IN ('random_sequence', 'chromosome', 'contig', 'supercontig','mitochondrial_chromosome','plastid_sequence','cloned_genomic','apicoplast_chromosome','maxicircle') ORDER BY organism, source_id diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql new file mode 100644 index 0000000000..b2a85443db --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats.psql @@ -0,0 +1,32 @@ +:CREATE_AND_POPULATE + WITH org_tot AS ( + SELECT + gs.organism + , min(gij.total_unique) as min_annot_score + , PERCENTILE_cont(0.005) within group (order by gij.total_unique asc) as perc005_annot_score + , PERCENTILE_cont(0.01) within group (order by gij.total_unique asc) as perc01_annot_score + , min(gij.percent_max) as min_annot_percent_max + , PERCENTILE_cont(0.0001) within group (order by gij.percent_max asc) as perc0001_annot_percent_max + , PERCENTILE_cont(0.0005) within group (order by gij.percent_max asc) as perc0005_annot_percent_max + , floor(max(gij.segment_end - gij.segment_start) * 1.25) as max_intron_length + FROM :SCHEMA.geneintronjunction gij + , :SCHEMA.genomicseqattributes gs + WHERE gs.na_sequence_id = gij.na_sequence_id + AND gij.annotated_intron = 'Yes' + AND gij.org_abbrev = ':ORG_ABBREV' + AND gs.org_abbrev = ':ORG_ABBREV' + GROUP BY gs.organism + ) + SELECT gs.na_sequence_id + , gs.source_id + , ot.* + , ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev + , current_timestamp as modification_date + FROM :SCHEMA.genomicseqattributes gs + , org_tot ot + WHERE gs.organism = ot.organism + AND gs.na_sequence_id in (SELECT DISTINCT ij.na_sequence_id FROM apidb.intronjunction ij, dots.nasequence s where ij.na_sequence_id = s.na_sequence_id and s.taxon_id = :TAXON_ID) + AND gs.org_abbrev = ':ORG_ABBREV' +:DECLARE_PARTITION + ; diff --git a/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql new file mode 100644 index 0000000000..73401d0fe4 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/GenomicSeqJunctionStats_ix.psql @@ -0,0 +1,2 @@ +create index GeneIntJuncStat_ix on :SCHEMA.GenomicSeqJunctionStats (na_sequence_id) + ; diff --git a/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql index f9895a117e..1e8d386532 100644 --- a/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql +++ b/Model/lib/psql/webready/orgSpecific/IntronSupportLevel.psql @@ -1,119 +1,53 @@ :CREATE_AND_POPULATE - - - create table :ORG_ABBREVIntronSupportLevel as - SELECT * FROM ( - SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-high') as string_value - FROM ( - SELECT gene_source_id - , ontology_term - , case when count(*) = intron_count THEN 'All' - when count(*) = 0 THEN 'None' - else 'Any' end as string_value - FROM ( - SELECT gij.gene_source_id - , 'intron_junction' as ontology_term - , intronCount.intron_count - FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats - , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount - WHERE gij.gene_source_id = intronCount.source_id - and gij.na_sequence_id = stats.na_sequence_id - and gij.annotated_intron = 'Yes' - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END - AND (gij.contained = 0 or gij.percent_max >= 2 /*stats.perc0005_annot_percent_max*/) - ) t - GROUP BY gene_source_id, ontology_term, intron_count - ) t - WHERE string_value = 'All' - - UNION - - SELECT gene_source_id - , ontology_term - , case when count(*) = intron_count THEN 'All-high' - when count(*) = 0 THEN 'None' - else 'Any-high' end as string_value - FROM ( - SELECT gij.gene_source_id - , 'intron_junction' as ontology_term - , intronCount.intron_count - FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats - , (SELECT count (*) as intron_count, source_id FROM apidbtuning.genemodeldump WHERE type = 'Intron' GROUP BY source_id) intronCount - WHERE gij.gene_source_id = intronCount.source_id - AND gij.na_sequence_id = stats.na_sequence_id - AND gij.annotated_intron = 'Yes' - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END - AND (gij.contained = 0 or gij.percent_max >= 2) - ) t2 - GROUP BY gene_source_id, ontology_term, intron_count - - UNION - - SELECT gene_source_id - , ontology_term - , CASE WHEN count(*) = intron_count THEN 'All-low' - WHEN count(*) = 0 THEN 'None' - ELSE 'Any-low' END as string_value - FROM ( - SELECT gij.gene_source_id - , 'intron_junction' as ontology_term - , intronCount.intron_count - FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats - , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount - WHERE gij.gene_source_id = intronCount.source_id - AND gij.na_sequence_id = stats.na_sequence_id - AND gij.annotated_intron = 'Yes' - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END - AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max) - AND gij.intron_feature_id not in ( - SELECT gij.intron_feature_id - FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats - WHERE gij.na_sequence_id = stats.na_sequence_id - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END - AND (gij.contained = 0 or gij.percent_max >= 2) - ) - ) t3 - GROUP BY gene_source_id, ontology_term, intron_count - - UNION - - SELECT gene_source_id, ontology_term, replace(string_value, 'All' , 'Any-low') as string_value - FROM ( - SELECT gene_source_id - , ontology_term - , case when count(*) = intron_count THEN 'All' - when count(*) = 0 THEN 'None' - else 'Any' end as string_value - FROM ( - SELECT gij.gene_source_id - , 'intron_junction' as ontology_term - , intronCount.intron_count - FROM apidbtuning.geneintronjunction gij, ApidbTuning.GeneIntJuncStats stats - , (select count (*) as intron_count, source_id from apidbtuning.genemodeldump where type = 'Intron' group by source_id) intronCount - WHERE gij.gene_source_id = intronCount.source_id - AND gij.na_sequence_id = stats.na_sequence_id - AND gij.annotated_intron = 'Yes' - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 4 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.min_annot_score ELSE 5*stats.min_annot_score END - AND (gij.contained = 0 or gij.percent_max >= stats.min_annot_percent_max) - AND gij.intron_feature_id not in ( - SELECT gij.intron_feature_id - FROM ApidbTuning.GeneIntronJunction gij, ApidbTuning.GeneIntJuncStats stats - WHERE gij.na_sequence_id = stats.na_sequence_id - AND gij.segment_end - gij.segment_start <= stats.max_intron_length * 2 - AND gij.total_unique >= CASE WHEN contained = 1 THEN stats.perc01_annot_score ELSE 5*stats.perc01_annot_score END - AND (gij.contained = 0 or gij.percent_max >= 2) - ) - ) t - GROUP BY gene_source_id, ontology_term, intron_count - ) t4 - WHERE string_value = 'All' - ) t - - +with annotatedJunctions AS ( + select gene_source_id + , count(*) as annotated_count + from ( + select distinct ta.gene_source_id + , il.na_sequence_id + , il.start_min + , il.end_max + , il.is_reversed + from apidb.intronlocation il + inner join :SCHEMA.transcriptattributes ta + ON il.parent_id = ta.na_feature_id + ) group by gene_source_id +), exptJunctions as ( + select gij.gene_source_id + , case when gij.total_unique >= stats.perc01_annot_score and gij.percent_max >= 2 + then 'High' + else 'Low' + end as conf + FROM :SCHEMA.GeneIntronJunction gij + JOIN :SCHEMA.GenomicSeqJunctionStats stats + ON gij.na_sequence_id = stats.na_sequence_id + where gij.annotated_intron = 'Yes' -- we only care about the annotated ones + and gij.org_abbrev = ':ORG_ABBREV' + and stats.org_abbrev = ':ORG_ABBREV' +), exptJunctionCounts as ( + select gene_source_id + , conf + , count(*) as intron_count + from exptJunctions + group by gene_source_id + , conf +) +select aj.gene_source_id + , CASE WHEN aj.annotated_count = ejc.intron_count + THEN 'All-high' + ELSE 'Any-high' + END as string_value + from annotatedJunctions aj + left join exptJunctionCounts ejc on aj.gene_source_id = ejc.gene_source_id + where ejc.conf = 'High' + AND ejc.intron_count > 0 +select aj.gene_source_id + , CASE WHEN aj.annotated_count = sum(ejc.intron_count) + THEN 'All-low' + ELSE 'Any-low' + END as string_value + from annotatedJunctions aj + left join exptJunctionCounts ejc on aj.gene_source_id = ejc.gene_source_id +group by aj.gene_source_id, aj.annotated_count +having sum(ejc.intron_count) > 0 :DECLARE_PARTITION; - diff --git a/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql new file mode 100644 index 0000000000..35b6c6318f --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation.psql @@ -0,0 +1,23 @@ +/* + The tuning table this was based on used a loop over the nasequences. We can add that back if slow but I am: + 1. using geneattributes which will make this faster than joining genefeature to nalocation and grouping + 2. using input tables here that are already partitioned +*/ +:CREATE_AND_POPULATE +SELECT ga.na_sequence_id + , ga.start_min + , ga.is_reversed + , ga.end_max + , ga.na_feature_id + , ga.source_id, + round(sum(nafe.value)::NUMERIC,2) as total_expression + FROM :SCHEMA.GeneAttributes ga + , :SCHEMA.JunctionToCoverageProfileMapping je + , results.nafeatureexpression nafe + WHERE ga.na_feature_id = nafe.na_feature_id + AND nafe.protocol_app_node_id = je.exp_pan_id + AND ga.org_abbrev = ':ORG_ABBREV' + AND je.org_abbrev = ':ORG_ABBREV' + GROUP BY ga.na_sequence_id,l.start_min,l.is_reversed,l.end_max,ga.na_feature_id,ga.source_id +:DECLARE_PARTITION + ; diff --git a/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql new file mode 100644 index 0000000000..a2d30c4cba --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/JunctionGeneLocation_id.psql @@ -0,0 +1,5 @@ +create index gnidloc_nafid_ix on :SCHEMA.JunctionGeneLocation (na_feature_id) + ; + +CREATE INDEX gnattidloc_pk_ix ON :SCHEMA.JunctionGeneLocation (na_sequence_id,start_min,is_reversed,end_max,na_feature_id,source_id,total_expression) + ; diff --git a/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql new file mode 100644 index 0000000000..462937437d --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping.psql @@ -0,0 +1,143 @@ +drop table if exists :SCHEMA.:ORG_ABBREVJunctionExpressionTmp; + +drop table if exists :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp; + + +/* + Join junctions to Expression values and get multiplier and decide on strand switching +*/ +CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunctionExpressionTmp AS + WITH stats AS ( + SELECT protocol_app_node_id + --, 'total' as type + --, count(*) as total_junctions + --, sum(unique_reads) as total_reads + , round(1000000/sum(unique_reads),4) as multiplier + FROM apidb.IntronJunction ij, dots.nasequence s + WHERE unique_reads >= 1 + AND ij.na_sequence_id = s.na_sequence_id + AND s.taxon_id = 1802185 + GROUP BY protocol_app_node_id + ), ij AS ( + SELECT pj.output_pan_id as junctions_pan_id + , p.output_pan_id as expression_pan_id + , avg(nafe.value) as avg_value,pan.name as exp_name + , regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name + FROM :SCHEMA.panio p + , :SCHEMA:panio pj + , results.nafeatureexpression nafe + , study.protocolappnode pan + WHERE pj.output_pan_id in (select distinct protocol_app_node_id from stats) + AND pj.input_pan_id = p.input_pan_id + AND p.output_pan_id = pan.protocol_app_node_id + AND pan.name like '%tpm - unique%' -- NOTE: probably better to use raw counts here?? + AND p.output_pan_id = nafe.protocol_app_node_id + AND p.org_abbrev = ':ORG_ABBREV' + AND pj.org_abbrev = ':ORG_ABBREV' + GROUP BY pj.output_pan_id, p.output_pan_id, pan.name + ) , part AS ( + SELECT + ij.junctions_pan_id + , ij.avg_value + , stats.multiplier + , max(ij.expression_pan_id) OVER w as max_exp_pan_id + , max(ij.sample_name) OVER w as max_sample_Name + , max(ij.exp_name) OVER w as max_exp_name + FROM ij, stats + WHERE ij.junctions_pan_id = stats.protocol_app_node_id + WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) + ) + SELECT DISTINCT junctions_pan_id + , first_value(max_exp_pan_id) OVER w1 as exp_pan_id + , first_value(max_sample_name) OVER w1 as sample_name + , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands + , multiplier + FROM part + WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) + ; + +create index :ORG_ABBREVjunexpgijtmp_ix on :SCHEMA.:ORG_ABBREVJunctionExpressionTmp(junctions_pan_id,exp_pan_id) + ; + + +/* + Grab general mapping stats +*/ + +CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp ( + junctions_pan_id, + read_length, + mapped_reads, + avg_mapping_coverage, + num_replicates +) AS + SELECT junctions_pan_id + , round(avg(average_read_length - 2),1) as read_length + , round(avg(number_mapped_reads),1) as mapped_reads + , round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) as avg_mapping_coverage + , count(*) as num_replicates + FROM (SELECT je.junctions_pan_id + , ca.value::NUMERIC as average_read_length + , cb.value::NUMERIC as number_mapped_reads + , cc.value::NUMERIC as avg_mapping_coverage + FROM :SCHEMA.:ORG_ABBREVJunctionExpressionTmp je + , :SCHEMA.PANIO ioa + , STUDY.CHARACTERISTIC ca + , STUDY.CHARACTERISTIC cb + , sres.ontologyterm ota + , sres.ontologyterm otb, + STUDY.CHARACTERISTIC cc, sres.ontologyterm otc + WHERE je.junctions_pan_id = ioa.output_pan_id + AND ioa.input_pan_id = ca.protocol_app_node_id + AND ca.value is not null + AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID + AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length' + AND ca.protocol_app_node_id = cb.protocol_app_node_id + AND cb.value is not null + AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID + AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads' + AND ca.protocol_app_node_id = cc.protocol_app_node_id + AND cc.value is not null + AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID + AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage' + AND ioa.org_abbrev = ':ORG_ABBREV' + ) t + GROUP by junctions_pan_id +; + + +CREATE INDEX :ORG_ABBREVmpstats_pk_ix on :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp + (junctions_pan_id,read_length,mapped_reads,avg_mapping_coverage,num_replicates) +; + + + +/* + This is the main table +*/ +:CREATE_AND_POPULATE + SELECT DISTINCT je.sample_name + , je.junctions_pan_id + , je.exp_pan_id + , ms.read_length + , ms.mapped_reads + , ms.avg_mapping_coverage + , ms.num_replicates + , je.switch_strands + , je.multiplier + , ':PROJECT_ID' as project_id + , ':ORG_ABBREV' as org_abbrev + , current_timestamp as modification_date + + FROM junexpgijtmp je + , mappingstatsgijtmp ms + WHERE je.junctions_pan_id = ms.junctions_pan_id +:DECLARE_PARTITION + ; + + +drop table :SCHEMA.:ORG_ABBREVJunctionExpressionTmp +; + +drop table :SCHEMA.:ORG_ABBREVJunctionMappingStatsTmp +; diff --git a/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql new file mode 100644 index 0000000000..c724f94d67 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/JunctionToCoverageProfileMapping_ix.psql @@ -0,0 +1,2 @@ +create index JunctionCovMap_ix on :SCHEMA.JunctionToCoverageProfileMapping(junctions_pan_id,exp_pan_id) + ; diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql b/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql deleted file mode 100644 index e0d672aa7c..0000000000 --- a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene.psql +++ /dev/null @@ -1,13 +0,0 @@ -:CREATE_AND_POPULATE - - SELECT DISTINCT pn.pathway_node_id - , tp.gene_source_id, tp.project_id, tp.org_abbrev, current_timestamp as modification_date - FROM :SCHEMA.transcriptpathway tp - , sres.pathwaynode pn - WHERE tp.pathway_id = pn.pathway_id - AND tp.ec_number_gene like replace(pn.display_label, '-', '%') - and tp.org_abbrev = ':ORG_ABBREV' - - -:DECLARE_PARTITION; - diff --git a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql index 21041cb21c..fbac42a0da 100644 --- a/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql +++ b/Model/lib/psql/webready/orgSpecific/PathwaysGeneTable.psql @@ -1,44 +1,28 @@ -:CREATE_AND_POPULATE - SELECT t2.*, current_timestamp as modification_date FROM ( - SELECT DISTINCT gene_source_id - , project_id - , org_abbrev - , pathway_source_id - , pathway_name - , count(reaction_source_id) as reactions - , enzyme - , expasy_url - , pathway_source - , exact_match - FROM ( - SELECT DISTINCT tp.gene_source_id - , tp.project_id - , tp.pathway_source_id - , tp.pathway_name - , tp.org_abbrev - , pr.reaction_source_id - , pr.enzyme - , pr.expasy_url - , tp.pathway_source - , CASE max(tp.exact_match) WHEN 1 THEN 'Yes' WHEN 0 THEN 'No' END AS exact_match - FROM :SCHEMA.TranscriptPathway tp - , :SCHEMA.PathwayAttributes pa - , :SCHEMA.PathwayCompounds pc - , :SCHEMA.PathwayReactions pr - WHERE tp.pathway_id = pa.pathway_id - AND pc.pathway_id = pa.pathway_id - AND pr.reaction_id = pc.reaction_id - AND pr.ext_db_name = pc.ext_db_name - AND tp.ec_number_pathway = pr.enzyme - AND tp.wildcard_count_gene <= tp.wildcard_count_pathway - AND pr.enzyme != '-.-.-.-' - AND tp.org_abbrev = ':ORG_ABBREV' - GROUP BY tp.gene_source_id, tp.project_id, tp.org_abbrev, tp.pathway_name, tp.pathway_source_id, pr.reaction_source_id, pr.enzyme, pr.expasy_url, tp.pathway_source - ) t - GROUP BY gene_source_id, project_id, org_abbrev, pathway_source_id, pathway_name, enzyme, expasy_url, pathway_source, exact_match - ) t2 - ORDER BY pathway_source, lower(pathway_name) - +DROP TABLE IF EXISTS :SCHEMA.ORG_ABBREVPathwaysGeneTableTmp; + + +/* ATTENTION: This table is empty. We will populate it in the comparative graph as loading depends on TranscriptPathway. +We are creating it here so that it is partitioned */ + +CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp ( + gene_source_id VARCHAR(80), + pathway_source_id VARCHAR(50), + pathway_name VARCHAR(150), + reactions NUMERIC(8), + enzyme VARCHAR(20), + expasy_url TEXT, + pathway_source TEXT, + exact_match VARCHAR(8), + + --for partitioning + project_id VARCHAR(20), + org_abbrev VARCHAR(20), + modification_date timestamp +); + +:CREATE_AND_POPULATE +SELECT pgt.* FROM :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp pgt :DECLARE_PARTITION; +DROP TABLE :SCHEMA.:ORG_ABBREVPathwaysGeneTableTmp; diff --git a/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql new file mode 100644 index 0000000000..4c50716bf7 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript.psql @@ -0,0 +1,33 @@ +drop table if exists :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp + +/* +ATTENTION: This script is used to make an empty table. It will be populated with a perl script. + +We make the table here in the usual way so we get partitioned webready table +*/ + + +create unlogged table :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp ( + location numeric(10), + type varchar(20), + na_sequence_id numeric(10), + is_unique numeric(1), + sum_cpm float8, + dist_to_first_atg numeric(10), + gene_source_id varchar(80), + transcript_source_id varchar(80), + dist_to_cds numeric(10), + is_dominant numeric(1), + strand char(1), + project_id varchar(20), + org_abbrev varchar(20), + modification_date timestamp + ); + + +:CREATE_AND_POPULATE +select * from :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp +:DECLARE_PARTITION; + +drop table :SCHEMA.:ORG_ABBREVSpliceSiteTranscriptTmp +; diff --git a/Model/lib/psql/webready/orgSpecific/PathwayNodeGene_ix.psql b/Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript_ix.psql similarity index 100% rename from Model/lib/psql/webready/orgSpecific/PathwayNodeGene_ix.psql rename to Model/lib/psql/webready/orgSpecific/SpliceSiteTranscript_ix.psql diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql index 1e47c1624b..c6ca34e893 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptAttributes.psql @@ -37,6 +37,7 @@ COALESCE(preferred_name.name, any_name.name) AS gene_name, cast(coalesce(preferred_gene_product.product, any_gene_product.product, gf.product) as VARCHAR(300)) as old_gene_product, + COALESCE(gp.product, 'unspecified product') as gene_product, REPLACE(so.name, '_', ' ') AS gene_type, gf.name as gene_ebi_biotype, gi.gene_id, @@ -79,6 +80,7 @@ INNER JOIN apidb.FeatureLocation nl ON gf.na_feature_id = nl.na_feature_id INNER JOIN sres.OntologyTerm so ON gf.sequence_ontology_id = so.ontology_term_id INNER JOIN :SCHEMA.GeneLocations gloc ON gf.source_id = gloc.source_id and gloc.org_abbrev = ':ORG_ABBREV' + LEFT JOIN :SCHEMA.GeneProduct gp ON gf.source_id = gp.source_id and gp.org_abbrev = ':ORG_ABBREV' INNER JOIN sres.ExternalDatabaseRelease edr ON gf.external_database_release_id = edr.external_database_release_id INNER JOIN sres.ExternalDatabase ed ON edr.external_database_id = ed.external_database_id INNER JOIN :SCHEMA.GenomicSeqAttributes gsa ON nl.na_sequence_id = gsa.na_sequence_id and gsa.org_abbrev = ':ORG_ABBREV' @@ -204,7 +206,7 @@ ) olds ON gf.na_feature_id = olds.na_feature_id WHERE nl.is_top_level = 1 AND nl.feature_type = 'GeneFeature' - AND (gsa.taxon_id::varchar = ':TAXON_ID' OR length(':TAXON_ID') = 0) + AND gsa.taxon_id = :TAXON_ID AND species_name.name_class = 'scientific name' AND (gf.is_predicted != 1 OR gf.is_predicted is null) AND tn.name_class = 'scientific name' diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql b/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql new file mode 100644 index 0000000000..e979f94986 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/TranscriptEC.psql @@ -0,0 +1,35 @@ + +DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptECTmp; +/* + +ATTENTION: This table is empty. We will populate it in the comparative graph so that we can include ortho-derived EC numbers. +We are creating them it so that they it is partitioned. +*/ + +/* This table maps EC numbers to transcripts */ + +CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptECTmp ( + source_id VARCHAR(80), + gene_source_id VARCHAR(80), + enzyme_class_id NUMERIC(12), + ec_number VARCHAR(16), + ec_number_1 NUMERIC(3), + ec_number_2 NUMERIC(3), + ec_number_3 NUMERIC(3), + ec_number_4 NUMERIC(3), + wildcard_count NUMERIC(3), + evidence_code VARCHAR(255), + + -- fields required for partitioning + project_id VARCHAR(20), + org_abbrev VARCHAR(20), + modification_date timestamp + +); + + +:CREATE_AND_POPULATE +select tet.* from :SCHEMA.:ORG_ABBREVTranscriptECTmp tet +:DECLARE_PARTITION; + +DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptECTmp; diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql new file mode 100644 index 0000000000..5b9767528f --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/TranscriptEC_ix.psql @@ -0,0 +1,5 @@ +CREATE INDEX TranscriptEc_1_ix on :SCHEMA.TranscriptEC (source_id, ec_number) + ; + +CREATE INDEX TranscriptEc_2_ix on :SCHEMA.TranscriptEC (ec_number, source_id) + ; diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql new file mode 100644 index 0000000000..09facffb8c --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup.psql @@ -0,0 +1,23 @@ + +DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp; +/* + +ATTENTION: This table is empty. We will populate it in the comparative graph +We are creating them it so that they it is partitioned. +*/ + +CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp ( + source_id VARCHAR(80), + gene_id VARCHAR(80), + group_id VARCHAR(16), + project_id varchar(20), + org_abbrev varchar(20), + modification_date timestamp +); + +:CREATE_AND_POPULATE +SELECT ogt.* from :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp ogt +:DECLARE_PARTITION; + + +DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptOrthologGroupTmp; diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql new file mode 100644 index 0000000000..ac9b3670e0 --- /dev/null +++ b/Model/lib/psql/webready/orgSpecific/TranscriptOrthologGroup_ix.psql @@ -0,0 +1,2 @@ +alter table :SCHEMA.TranscriptOrthologGroup + add constraint TranscriptOrthologGroup_pk primary key (source_id); diff --git a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql index 3280209fa6..f63449f735 100644 --- a/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql +++ b/Model/lib/psql/webready/orgSpecific/TranscriptPathway.psql @@ -1,79 +1,40 @@ -:CREATE_AND_POPULATE - WITH transcript_ec AS ( - SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, - regexp_count( ec.ec_number, '-') as wildcard_count - FROM sres.EnzymeClass ec - WHERE enzyme_class_id IN (SELECT aseqEc.enzyme_class_id - FROM dots.AaSequenceEnzymeClass aseqEc, dots.aasequence seq - WHERE aseqEc.aa_sequence_id = seq.aa_sequence_id - AND seq.taxon_id = :TAXON_ID) - GROUP BY ec.enzyme_class_id - ), - pathway_node_ec AS ( - SELECT distinct pn.pathway_id, pn.row_id as enzyme_class_id - FROM sres.PathwayNode pn, sres.ontologyterm ot - WHERE pn.pathway_node_type_id = ot.ontology_term_id - AND ot.name = 'enzyme' - AND pn.display_label != '-.-.-.-' - ), - pathway_ec AS ( - SELECT ec.enzyme_class_id, ec.ec_number, ec.ec_number_1, ec.ec_number_2, ec.ec_number_3, ec.ec_number_4, - regexp_count( ec.ec_number, '-') as wildcard_count - FROM sres.EnzymeClass ec - WHERE enzyme_class_id IN (SELECT enzyme_class_id FROM pathway_node_ec) - GROUP BY ec.enzyme_class_id - ), - ec_match AS ( - SELECT tec.enzyme_class_id as transcript_enzyme_class_id, - pec.enzyme_class_id as pathway_enzyme_class_id, - tec.wildcard_count as wildcard_count_transcript, - pec.wildcard_count as wildcard_count_pathway, - tec.ec_number as ec_number_transcript, - pec.ec_number as ec_number_pathway - FROM transcript_ec tec, pathway_ec pec - WHERE (tec.ec_number_1 = pec.ec_number_1 or tec.ec_number_1 is null or pec.ec_number_1 is null) - AND (tec.ec_number_2 = pec.ec_number_2 or tec.ec_number_2 is null or pec.ec_number_2 is null) - AND (tec.ec_number_3 = pec.ec_number_3 or tec.ec_number_3 is null or pec.ec_number_3 is null) - AND (tec.ec_number_4 = pec.ec_number_4 or tec.ec_number_4 is null or pec.ec_number_4 is null) - ) - SELECT DISTINCT - ':PROJECT_ID' as project_id - , ':ORG_ABBREV' as org_abbrev - , CURRENT_TIMESTAMP as modification_date - , ga.source_id - , ga.gene_source_id - , pa.source_id as pathway_source_id - , pa.name as pathway_name - , ec_match.ec_number_transcript as ec_number_gene - , ec_match.wildcard_count_transcript as wildcard_count_gene - , ec_match.ec_number_pathway - , ec_match.wildcard_count_pathway - , CASE WHEN ec_match.ec_number_pathway = ec_match.ec_number_transcript - THEN 1 - ELSE 0 END as exact_match - , CASE WHEN ec_match.wildcard_count_pathway + ec_match.wildcard_count_transcript = 0 - THEN 1 - ELSE 0 END as complete_ec - , pa.pathway_id - , pa.pathway_source - , p.external_database_release_id - FROM :SCHEMA.PathwayAttributes pa - , sres.pathway p - , pathway_node_ec pec - , ec_match - , dots.AaSequenceEnzymeClass asec - , :SCHEMA.TranscriptAttributes ga - WHERE ga.org_abbrev = ':ORG_ABBREV' - AND pa.pathway_id = pec.pathway_id - AND p.pathway_id = pa.pathway_id - AND pec.enzyme_class_id = ec_match.pathway_enzyme_class_id - AND asec.enzyme_class_id = ec_match.transcript_enzyme_class_id - AND ga.aa_sequence_id = asec.aa_sequence_id - -- TODO: Need new downstream table to bring in OrthoMCLDerived EC associations - -- AND ( - -- (ga.orthomcl_name IS NULL AND asec.evidence_code != 'OrthoMCLDerived') - -- OR ga.orthomcl_name IS NOT NULL - -- ) - ; +DROP TABLE if exists :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp; +/* + +ATTENTION: This table is empty. We will populate it in the comparative graph so that we can include ortho-derived EC numbers. +We are creating it here so that it is partitioned. +*/ + +/* This is the final TranscriptPathway table */ + +CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp ( + -- transcript information + source_id VARCHAR(80), + gene_source_id VARCHAR(80), + + --pathway information + pathway_source_id VARCHAR(50), + pathway_name VARCHAR(150), + pathway_id NUMERIC(12), + pathway_source TEXT, -- TODO should this be a varchar? + external_database_release_id NUMERIC(10), + --match information + ec_number_gene VARCHAR(16), + wildcard_count_gene NUMERIC(3), + ec_number_pathway VARCHAR(16), + wildcard_count_pathway NUMERIC(3), + exact_match NUMERIC(3), + complete_ec NUMERIC(3), + + -- fields required for partioning + project_id varchar(20), + org_abbrev varchar(20), + modification_date timestamp +); + +:CREATE_AND_POPULATE +SELECT tpt.* from :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp tpt :DECLARE_PARTITION; + +DROP TABLE :SCHEMA.:ORG_ABBREVTranscriptPathwayTmp; diff --git a/Model/lib/psql/webready/unknown/NameMappingGIJ.psql b/Model/lib/psql/webready/unknown/NameMappingGIJ.psql deleted file mode 100644 index d21a1b1cf4..0000000000 --- a/Model/lib/psql/webready/unknown/NameMappingGIJ.psql +++ /dev/null @@ -1,118 +0,0 @@ - DROP TABLE IF EXISTS :SCHEMA.:ORG_ABBREVJunExpGIJtmp; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVJunExpGIJtmp AS - WITH ij AS ( - SELECT pj.output_pan_id as junctions_pan_id, p.output_pan_id as expression_pan_id, avg(nafe.value) as avg_value,pan.name as exp_name, - regexp_replace(pan.name, ' \[htseq-union.*', '') as sample_name - FROM :SCHEMA.panio p, :SCHEMA.panio pj, results.nafeatureexpression nafe, study.protocolappnode pan - WHERE pj.output_pan_id in (select distinct protocol_app_node_id from apidb.intronjunction) - AND pj.input_pan_id = p.input_pan_id - AND p.output_pan_id = pan.protocol_app_node_id - AND pan.name like '%tpm - unique%' - AND p.output_pan_id = nafe.protocol_app_node_id - and p.org_abbrev = ':ORG_ABBREV' - and pj.org_abbrev = ':ORG_ABBREV' - GROUP BY pj.output_pan_id, p.output_pan_id, pan.name - ORDER BY pj.output_pan_id - ) , stats AS ( - SELECT protocol_app_node_id, 'total' as type, count(*) as total_junctions, - sum(unique_reads) as total_reads, round(1000000/sum(unique_reads),4) as multiplier - FROM apidb.IntronJunction - WHERE unique_reads >= 1 - GROUP BY protocol_app_node_id - ), part AS ( - SELECT - ij.junctions_pan_id, ij.avg_value, stats.multiplier - , max(ij.expression_pan_id) OVER w as max_exp_pan_id - , max(ij.sample_name) OVER w as max_sample_Name - , max(ij.exp_name) OVER w as max_exp_name - FROM ij, stats - WHERE ij.junctions_pan_id = stats.protocol_app_node_id - WINDOW w AS (partition by ij.junctions_pan_id, stats.multiplier, ij.avg_value) - ) - SELECT DISTINCT * FROM ( - SELECT junctions_pan_id - , first_value(max_exp_pan_id) OVER w1 as exp_pan_id - , first_value(max_sample_name) OVER w1 as sample_name - , CASE WHEN first_value(max_exp_name) OVER w1 LIKE '%secondstrand%' THEN 'true' ELSE 'false' END as switch_strands - , multiplier - FROM part - WINDOW w1 AS (PARTITION BY junctions_pan_id, multiplier ORDER BY avg_value DESC) - ) t - ORDER BY junctions_pan_id - - ; - - - - create index junexpgijtmp_ix on :SCHEMA.:ORG_ABBREVJunExpGIJtmp(junctions_pan_id,exp_pan_id) - - ; - - drop table if exists :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp; - - CREATE UNLOGGED TABLE :SCHEMA.:ORG_ABBREVMappingStatsGIJtmp ( - junctions_pan_id, - read_length, - mapped_reads, - avg_mapping_coverage, - num_replicates - ) AS - SELECT junctions_pan_id, round(avg(average_read_length - 2),1) as read_length, - round(avg(number_mapped_reads),1) as mapped_reads, - round(avg(avg_mapping_coverage) * ((avg(average_read_length) - 2) / avg(average_read_length)),2) - as avg_mapping_coverage, - count(*) as num_replicates - FROM (SELECT je.junctions_pan_id, ca.value::NUMERIC as average_read_length, - cb.value::NUMERIC as number_mapped_reads, - cc.value::NUMERIC as avg_mapping_coverage - FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, STUDY.CHARACTERISTIC ca, sres.ontologyterm ota, - :SCHEMA.PANIO ioa, STUDY.CHARACTERISTIC cb, sres.ontologyterm otb, - STUDY.CHARACTERISTIC cc, sres.ontologyterm otc - WHERE je.junctions_pan_id = ioa.output_pan_id - AND ioa.input_pan_id = ca.protocol_app_node_id - AND ca.value is not null - and ioa.org_abbrev = ':ORG_ABBREV' - AND ca.QUALIFIER_ID = ota.ONTOLOGY_TERM_ID - AND ota.source_id IN ('EuPathUserDefined_00504','EUPATH_0000457') -- '%average read length' - AND ca.protocol_app_node_id = cb.protocol_app_node_id - AND cb.value is not null - AND cb.QUALIFIER_ID = otb.ONTOLOGY_TERM_ID - AND otb.source_id IN ('EuPathUserDefined_00503','EUPATH_0000456') -- '%number mapped reads' - AND ca.protocol_app_node_id = cc.protocol_app_node_id - AND cc.value is not null - AND cc.QUALIFIER_ID = otc.ONTOLOGY_TERM_ID - AND otc.source_id IN ('EuPathUserDefined_00501','GENEPIO_0000092') -- '%average mapping coverage' - ) t - GROUP by Junctions_Pan_Id - - ; - - -:CREATE_AND_POPULATE - SELECT DISTINCT edp.dataset_presenter_display_name as exp_name, - edp.external_database_name, je.sample_name, - je.junctions_pan_id, je.exp_pan_id, - substr(dp.value, 1, 4000) as presenter_switch_strands, - substr(sj.value, 1, 4000) as show_intron_junctions, - substr(uj.value, 1, 4000) as include_unified_junctions, - ms.read_length, ms.mapped_reads, ms.avg_mapping_coverage, ms.num_replicates, - je.switch_strands, je.multiplier - FROM :SCHEMA.:ORG_ABBREVjunexpgijtmp je, study.nodeNodeSet sl, study.NodeSet s, ExternalDbDatasetPresenter edp, - DatasetProperty dp, DatasetProperty sj, DatasetProperty uj, mappingstatsgijtmp ms - WHERE sl.protocol_app_node_id = je.junctions_pan_id - AND je.junctions_pan_id = ms.junctions_pan_id - AND s.node_set_id = sl.node_set_id - AND s.node_type like 'junctions' - AND s.external_database_release_id = edp.external_database_release_id - AND dp.dataset_presenter_id = edp.dataset_presenter_id - AND dp.property = 'switchStrandsProfiles' - AND sj.dataset_presenter_id = edp.dataset_presenter_id - AND sj.property = 'showIntronJunctions' - AND uj.dataset_presenter_id = edp.dataset_presenter_id - AND uj.property = 'includeInUnifiedJunctions' - AND (substr(sj.value, 1, 10) = 'true' or substr(uj.value, 1, 10) = 'true') - - -:DECLARE_PARTITION; - diff --git a/Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql b/Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql deleted file mode 100644 index c386e6ba00..0000000000 --- a/Model/lib/psql/webready/unknown/NameMappingGIJ_ix.psql +++ /dev/null @@ -1,3 +0,0 @@ - create index namemappinggij_ix on :SCHEMA.NameMappingGIJ (junctions_pan_id,exp_pan_id) - ; -