diff --git a/Load/lib/perl/InstallEdaStudyFromArtifacts.pm b/Load/lib/perl/InstallEdaStudyFromArtifacts.pm index 02e264b32..e089bfd99 100644 --- a/Load/lib/perl/InstallEdaStudyFromArtifacts.pm +++ b/Load/lib/perl/InstallEdaStudyFromArtifacts.pm @@ -9,6 +9,8 @@ use JSON qw( decode_json ); use File::Copy; use ApiCommonData::Load::Psql; +use Data::Dumper; + my $SQLLDR_STREAM_SIZE = 512000; my $SQLLDR_ROWS = 5000; my $SQLLDR_BINDSIZE = 2048000; @@ -83,7 +85,7 @@ sub isDryRun {defined $_[0]->{DRYRUN} ? 1 : 0 } # these 2 are needed for installing and uninstalling internal datasets # because we are running batches in nextflow, we do not keep the original install.json sub skipPreexistingTables { defined $_[0]->{SKIP_PREEXISTING_TABLES} ? 1 : 0 } -sub getExternalDatabaseName { $_[0]->{EXTERNAL_DATABASE_NAME} } +sub getExternalDatabaseRlsSpecs { $_[0]->{EXTERNAL_DATABASE_RLS_SPECS} || [] } sub getConfigsArrayFromInstallJsonFile { @@ -114,42 +116,49 @@ sub getInstallJsonFile { sub uninstallDataFromExternalDatabase { my ($self) = @_; - - my $dbh = $self->getDbh(); my $schema = $self->getDbSchema(); - my $externalDatabaseName = $self->getExternalDatabaseName(); + my $externalDatabaseRlsSpecs = $self->getExternalDatabaseRlsSpecs(); - print STDERR "Uninstall Study with extDbName=$externalDatabaseName\n"; + my @extDbRlsIds; my $externalDatabaseReleaseIdQuery = "select r.external_database_release_id from sres.externaldatabase d join sres.externaldatabaserelease r on d.external_database_id = r.external_database_id -where d.name = ?"; +where d.name = ? + and r.version like ?"; my $sh_db = $dbh->prepare($externalDatabaseReleaseIdQuery); - $sh_db->execute($externalDatabaseName); - my %extDbRlsIds; - while(my ($extDbRlsId) = $sh_db->fetchrow_array()) { - $extDbRlsIds{$extDbRlsId} = 1; - } - $sh_db->finish(); - my @uniqueExtDbRlsIds = keys(%extDbRlsIds); - unless(scalar(@uniqueExtDbRlsIds) == 1) { - die "Must be one externaldatabaserelease for $externalDatabaseName"; + foreach my $spec (@$externalDatabaseRlsSpecs) { + my ($extDbName, $extDbVer) = split(/\|/, $spec); + + print STDERR "EXTDBNAME=$extDbName\n"; + print STDERR "EXTDBVER=$extDbVer\n"; + + $sh_db->execute($extDbName, $extDbVer); + + while(my ($extDbRlsId) = $sh_db->fetchrow_array()) { + push @extDbRlsIds, $extDbRlsId; + } + $sh_db->finish(); } - my $externalDatabaseReleaseId = $uniqueExtDbRlsIds[0]; - my $studyIdQuery = "select s.study_id from eda.study s where s.external_database_release_id = ?"; + my $placeholders = join(",", map { "?" } @extDbRlsIds); + my $studyIdQuery = "select s.study_id +from eda.study s inner join eda.studyexternaldatabaserelease ser + on s.study_id = ser.study_id +where ser.external_database_release_id in ($placeholders)"; my $sh_study = $dbh->prepare($studyIdQuery); - $sh_study->execute($externalDatabaseReleaseId); + $sh_study->execute(@extDbRlsIds); my %studyIds; while(my ($studyId) = $sh_study->fetchrow_array()) { + print STDERR "STUDY_ID=$studyId\n"; + $studyIds{$studyId} = 1; } $sh_study->finish(); @@ -157,19 +166,19 @@ where d.name = ?"; my @uniqueStudyIds = keys(%studyIds); if(scalar(@uniqueStudyIds) == 0) { - print STDERR "No studyId found for externaldatabasename $externalDatabaseName... nothing to do\n"; + print STDERR "No studyId found for externaldatabasename ... nothing to do\n"; return; } unless(scalar(@uniqueStudyIds) == 1) { - die "Must be one study id for externaldatabasereleaseid=$externalDatabaseReleaseId"; + die "Must be one study id for externaldatabasereleaseid"; } my $studyId = $uniqueStudyIds[0]; my $viewsQuery = $self->getQueryForTableOrViewNames("views", $schema); my $sh_view = $dbh->prepare($viewsQuery); - $sh_view->execute($externalDatabaseReleaseId); + $sh_view->execute($studyId); while(my ($viewName) = $sh_view->fetchrow_array()) { $self->dropTableOrView('view', "${schema}.${viewName}"); } @@ -177,7 +186,7 @@ where d.name = ?"; my $tablesQuery = $self->getQueryForTableOrViewNames("tables", $schema); my $sh_table = $dbh->prepare($tablesQuery); - $sh_table->execute($externalDatabaseReleaseId); + $sh_table->execute($studyId); my $tableCount; while(my ($tableName) = $sh_table->fetchrow_array()) { @@ -186,16 +195,19 @@ where d.name = ?"; } $sh_table->finish(); - die "No artifact tables found for externaldatabase=$externalDatabaseName" unless($tableCount); + print STDERR "No artifact tables found. " unless($tableCount); my $delEntityTypeGraph = " delete from $schema.entitytypegraph where study_id = $studyId"; print STDERR "RUNNING SQL: $delEntityTypeGraph\n\n"; $dbh->do($delEntityTypeGraph) || die "Failed running sql: $delEntityTypeGraph\n" unless $self->isDryRun(); + my $delStudyExternalDatabaseRelease = " delete from $schema.studyexternaldatabaserelease where study_id = $studyId"; + print STDERR "RUNNING SQL: $delStudyExternalDatabaseRelease\n\n"; + $dbh->do($delStudyExternalDatabaseRelease) || die "Failed running sql: $delEntityTypeGraph\n" unless $self->isDryRun(); + my $delStudy = "delete from $schema.study where study_id = $studyId"; print STDERR "RUNNING SQL: $delStudy\n\n"; $dbh->do($delStudy) || die "Failed running sql: $delStudy\n" unless $self->isDryRun(); - } sub getQueryForTableOrViewNames { @@ -207,7 +219,7 @@ join ${schema}.entitytypegraph etg on s.study_id = etg.study_id join information_schema.${tablesOrViews} t on lower(t.table_name) like '%' || lower(s.internal_abbrev) || '_' || lower(etg.internal_abbrev) -where s.external_database_release_id = ? +where s.study_id = ? and lower(t.table_schema) = lower('$schema') "; } diff --git a/Load/plugin/perl/InsertEdaStudyFromArtifacts.pm b/Load/plugin/perl/InsertEdaStudyFromArtifacts.pm index 8fc05bf74..2c0eab775 100644 --- a/Load/plugin/perl/InsertEdaStudyFromArtifacts.pm +++ b/Load/plugin/perl/InsertEdaStudyFromArtifacts.pm @@ -5,8 +5,7 @@ use strict; use GUS::Model::EDA::Study; use GUS::Model::EDA::EntityTypeGraph; - -use ApiCommonData::Load::InstallEdaStudyFromArtifacts; +use GUS::Model::EDA::StudyExternalDatabaseRelease; use Data::Dumper; @@ -49,7 +48,7 @@ my $argsDeclaration = descr => 'External Database Spec for this study', reqd => 1, constraintFunc => undef, - isList => 0, }), + isList => 1, }), ]; @@ -84,6 +83,7 @@ sub run { my ($self) = @_; my $extDbRlsSpec = $self->getArg('extDbRlsSpec'); + my $inputDir = $self->getArg("inputDirectory"); my $outputDir = $self->getArg("outputDirectory"); my $gusConfigFile = $self->getArg("gusConfigFile"); @@ -92,34 +92,44 @@ sub run { $self->userError("Output Directory already Exists"); } - my $extDbRlsId = $self->getExtDbRlsId($extDbRlsSpec); + #my $extDbRlsId = $self->getExtDbRlsId($extDbRlsSpec); my $installer = $self->makeInstaller($inputDir, $outputDir, $gusConfigFile, $extDbRlsSpec); my $installJsonFile = $installer->getInstallJsonFile($inputDir); my $configsArray = $installer->getConfigsArrayFromInstallJsonFile($installJsonFile); my ($studyConfig) = grep { $_->{type} eq 'table' && $_->{name} eq 'study' } @$configsArray; - my $studyHash = $self->preexistingTable($studyConfig, 'study.cache'); - - $studyHash->{'external_database_release_id'} = $extDbRlsId; + my $studyArray = $self->preexistingTable($studyConfig, 'study.cache'); + if(scalar @$studyArray != 1) { + $self->error("study.cache must contain one row of data"); + } - my $study = GUS::Model::EDA::Study->new($studyHash); + my $study = GUS::Model::EDA::Study->new($studyArray->[0]); my ($entityTypeGraphConfig) = grep { $_->{type} eq 'table' && $_->{name} eq 'entitytypegraph' } @$configsArray; + my $entityTypeGraphArray = $self->preexistingTable($entityTypeGraphConfig, 'entitytypegraph.cache'); + foreach my $entityTypeGraphHash (@$entityTypeGraphArray) { + my $entityTypeGraph = GUS::Model::EDA::EntityTypeGraph->new($entityTypeGraphHash); + $entityTypeGraph->setParent($study); + } - my $entityTypeGraphHash = $self->preexistingTable($entityTypeGraphConfig, 'entitytypegraph.cache'); - my $entityTypeGraph = GUS::Model::EDA::EntityTypeGraph->new($entityTypeGraphHash); + foreach my $spec (@{$extDbRlsSpec}) { + my $extDbRlsId = $self->getExtDbRlsId($spec); + my $studyExtDbRls = GUS::Model::EDA::StudyExternalDatabaseRelease->new({external_database_release_id => $extDbRlsId}); + $studyExtDbRls->setParent($study); + } - $entityTypeGraph->setParent($study); - $entityTypeGraph->submit(); + $study->submit(); if($self->getArg('commit')) { # now install the artifacts $installer->installData(); } - return("Loaded an EDA Study for external_database_release_id = $extDbRlsId"); + + + return("Loaded an EDA Study for "); } sub preexistingTable { @@ -129,30 +139,24 @@ sub preexistingTable { open(FILE, $cacheFileFullPath) or $self->error("Could not open file $cacheFileFullPath for reading: $!"); - my @data = (); - - my $count; + my $rv = []; while() { chomp; my @line = split(/\t/, $_); - @data = @line; - $count++; - } - - $self->userError("cache file $cacheFile must contain exactly one row") if($count != 1); - my $rv = {}; + my $row = {}; + foreach my $field (@{$config->{fields}}) { + next if($field->{macro}); - foreach my $field (@{$config->{fields}}) { - next if($field->{macro}); + my $key = $field->{name}; + my $index = $field->{cacheFileIndex}; + my $value = $line[$index]; - my $key = $field->{name}; - my $index = $field->{cacheFileIndex}; - my $value = $data[$index]; - $rv->{$key} = $value; + $row->{$key} = $value; + } + push @$rv, $row; } - close FILE; return $rv; } @@ -166,8 +170,6 @@ sub makeInstaller { die "gus.confg $gusConfigFile does not exist" unless -e $gusConfigFile; - my ($extDbName, $extDbVersion) = split(/\|/, $extDbRlsSpec); - my $gusconfig = GUS::Supported::GusConfig->new($gusConfigFile); my ($host, $port, $dbname); my $dsn = $gusconfig->getDbiDsn(); @@ -215,7 +217,7 @@ sub makeInstaller { 'DATA_FILES' => $outputDir, 'INPUT_DIR' => $inputDir, 'SKIP_PREEXISTING_TABLES' => 1, # we are loading these rows here not in the VDI artifact loader - 'EXTERNAL_DATABASE_NAME' => $extDbName, # This is needed for Undo only + 'EXTERNAL_DATABASE_RLS_SPECS' => $extDbRlsSpec, # This is needed for Undo only ); return ApiCommonData::Load::InstallEdaStudyFromArtifacts->new(\%requiredVars); @@ -229,10 +231,12 @@ sub undoTables { sub undoPreprocess { my($self, $dbh, $rowAlgInvocationList) = @_; - my $gusConfigFile = $self->getAlgorithmParam($dbh,$rowAlgInvocationList,'gusConfigFile'); + my $gusConfigFile = $self->getAlgorithmParam($dbh,$rowAlgInvocationList,'gusConfigFile')->[0]; + + my $extDbRlsSpec = $self->getAlgorithmParam($dbh,$rowAlgInvocationList,'extDbRlsSpec'); - unless(-e $gusConfigFile && $extDbRlsSpec) { + unless(-e $gusConfigFile && scalar @$extDbRlsSpec > 0) { $self->error("Required algorithm param missing OR does not exist gusConfigFile=$gusConfigFile, extDbRlsSpec=$extDbRlsSpec"); } @@ -263,11 +267,7 @@ sub getAlgorithmParam { my @values = keys %paramValues; - if(scalar keys %paramValues != 1) { - $self->error("Odd looking param values for $paramKey: \n" . Dumper(\%paramValues)); - } - - return $values[0]; + return \@values; }