diff --git a/.github/workflows/linkml_tasks.yaml b/.github/workflows/linkml_tasks.yaml index 258ffcd2..2caa2f24 100644 --- a/.github/workflows/linkml_tasks.yaml +++ b/.github/workflows/linkml_tasks.yaml @@ -36,17 +36,11 @@ jobs: make lint-no-warn continue-on-error: true - # - name: Validate sample data against the schema - # id: test_sample_data - # run: | - # make test-sample-data - # continue-on-error: true - - # - name: Validate sample data against JSONschema - # id: test_sample_data_jsonschema - # run: | - # make test-sample-data-jsonschema - # continue-on-error: true + - name: Run LinkML example files + id: run_linkml_examples + run: | + make test-examples + continue-on-error: true - name: Test documentation generation id: test_docgen @@ -61,19 +55,20 @@ jobs: # continue-on-error: true - name: outcome failure - if: steps.lint_linkml.outcome != 'success' || steps.validate_linkml.outcome != 'success' || steps.test_docgen.outcome != 'success' + if: steps.lint_linkml.outcome != 'success' || steps.validate_linkml.outcome != 'success' || steps.run_linkml_examples.outcome != 'success' || steps.test_docgen.outcome != 'success' # steps.test_sample_data.outcome != 'success' || steps.test_sample_data_jsonschema.outcome != 'success' run: | echo "linkml linting: ${{ steps.lint_linkml.outcome }}" echo "linkml schema validation: ${{ steps.validate_linkml.outcome }}" + echo "linkml example runs: ${{ steps.run_linkml_examples.outcome }}" echo "documentation generator: ${{ steps.test_docgen.outcome }}" exit 1 # echo "sample data validation: ${{ steps.test_sample_data.outcome }}" # echo "sample data JSON Schema validation: ${{ steps.test_sample_data_jsonschema.outcome }}" - name: outcome success - if: steps.lint_linkml.outcome == 'success' && steps.validate_linkml.outcome == 'success' && steps.test_docgen.outcome == 'success' + if: steps.lint_linkml.outcome == 'success' && steps.validate_linkml.outcome == 'success' && steps.linkml_run_examples.outcome == 'success' && steps.test_docgen.outcome == 'success' run: | echo All tests passed successfully! exit 0 diff --git a/Makefile b/Makefile index a6f3d0a3..5f8fc629 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ LINKML_DIR = $(SRC_DIR)/linkml JSONSCHEMA_DIR = $(SRC_DIR)/jsonschema PYTHON_DIR = $(SRC_DIR)/cdm_schema # sample data -SAMPLE_DATA_DIR = sample_data +SAMPLE_DATA_DIR = test/data # unused SHEET_MODULE = $(LINKML_SCHEMA_GOOGLE_SHEET_MODULE) @@ -164,12 +164,6 @@ lint-validate: ## validate the schema; warnings or errors result in a non-zero lint-validate-no-warn: ## validate the schema; warnings do not result in a non-zero exit code $(RUN) linkml-lint --ignore-warnings --validate $(LINKML_DIR) -test-sample-data: ## validate sample data against LinkML schema - $(RUN) linkml-validate -s $(LINKML_SCHEMA_FILE) sample_data/**/**/*.json - -test-sample-data-jsonschema: ## validate sample data against JSONschema - $(RUN) check-jsonschema --schemafile $(JSONSCHEMA_DIR)/$(SCHEMA_BASE_NAME).schema.json --verbose sample_data/**/**/*.json - check-config: ifndef LINKML_SCHEMA_NAME $(error **Project not configured**:\n\n - See '.env.public'\n\n) @@ -187,18 +181,15 @@ examples/%.json: $(SAMPLE_DATA_DIR)/%.yaml examples/%.ttl: $(SAMPLE_DATA_DIR)/%.yaml $(RUN) linkml-convert -P EXAMPLE=http://example.org/ -s $(LINKML_SCHEMA_FILE) -C $(SCHEMA_ROOT) $< -o $@ -test-examples: examples/output -examples/output: src/$(SCHEMA_NAME)/schema/$(SCHEMA_NAME).yaml - mkdir -p $@ +test-examples: $(RUN) linkml-run-examples \ - --output-formats json \ + --input-formats yaml \ --output-formats yaml \ - --counter-example-input-directory $(SAMPLE_DATA_DIR)/invalid \ - --input-directory $(SAMPLE_DATA_DIR)/valid \ - --output-directory $@ \ - --schema $< > $@/README.md - + --counter-example-input-directory tests/data/invalid \ + --input-directory tests/data/valid \ + --output-directory examples/output \ + --schema $(LINKML_SCHEMA_FILE) > examples/output/README.md serve: mkd-serve ## Test documentation locally diff --git a/examples/output/README.md b/examples/output/README.md new file mode 100644 index 00000000..b308eebc --- /dev/null +++ b/examples/output/README.md @@ -0,0 +1,37 @@ +## Feature +### Input +```yaml +cds_phase: '0' +e_value: 1.2e-15 +end: 175000 +feature_id: CDM:f47ac10b-58cc-4372-a567-0e02b2c3d479 +hash: 5d41402abc4b2a76b9719d911017c592 +p_value: 0.001 +source_database: RefSeq +start: 125000 +strand: positive +type: SO:0001006 + +``` +## Feature-example2 +### Input +```yaml +end: 40484 +feature_id: SP:AA_WF_I-V_contig_20285 +hash: 7d793037a0760186574b0282f2f435e7 +p_value: 0.9953 +start: 1 +type: SO:0000155 + +``` +## Feature-example3 +### Input +```yaml +end: 11663 +feature_id: PV:AA_WF_I-V_contig_3062_provirus_3_11663 +hash: 3c6e0b8a9c15224a8228b9a98ca1531d +p_value: 0.9805 +start: 3 +type: SO:0001006 + +``` diff --git a/tests/data/plasmid-raw-data.txt b/tests/data/plasmid-raw-data.txt new file mode 100644 index 00000000..c70d94e3 --- /dev/null +++ b/tests/data/plasmid-raw-data.txt @@ -0,0 +1,44 @@ +seq_name length topology n_genes genetic_code plasmid_score fdr n_hallmarks marker_enrichment conjugation_genes amr_genes +AA_WF_I-V_contig_20285 6313 No terminal repeats 5 11 0.9956 NA 0 4.6079 NA NA +AA_WF_I-V_contig_165867 1322 No terminal repeats 2 11 0.9956 NA 2 3.2077 MOBQ NA +AA_WF_I-V_contig_163871 1333 No terminal repeats 2 11 0.9955 NA 2 3.0908 NA NA +AA_WF_I-V_contig_2765 40484 No terminal repeats 45 11 0.9953 NA 12 34.9604 T_virB2;T_virB11;MOBP1;T_virB5;T_virB10;T_virB9;T_virB8;virb4 NA +AA_WF_I-V_contig_22984 5691 No terminal repeats 4 11 0.9952 NA 1 4.889 NA NA +AA_WF_I-V_contig_20770 6179 No terminal repeats 5 11 0.9952 NA 0 5.9478 NA NA +AA_WF_I-V_contig_72789 2361 No terminal repeats 6 11 0.995 NA 3 5.6278 MOBQ NA +AA_WF_I-V_contig_32632 4304 No terminal repeats 6 11 0.995 NA 1 9.0599 MOBP1 NA +AA_WF_I-V_contig_9167 12808 No terminal repeats 11 11 0.9949 NA 0 8.693 NA NA +AA_WF_I-V_contig_21980 5903 No terminal repeats 8 11 0.9948 NA 1 12.73 NA NA +AA_WF_I-V_contig_46174 3309 No terminal repeats 3 11 0.9947 NA 1 3.6934 NA NA +AA_WF_I-V_contig_62445 2645 No terminal repeats 3 11 0.9946 NA 0 4.3487 NA NA +AA_WF_I-V_contig_4297 26024 No terminal repeats 23 11 0.9946 NA 14 21.2045 F_traG;F_traH;F_traF;F_trbC;F_trbC;F_traU;F_traW;F_traV;F_traK;F_traE;F_traL NA +AA_WF_I-V_contig_114914 1704 No terminal repeats 2 11 0.9945 NA 2 2.1114 NA NA +AA_WF_I-V_contig_3457 32153 No terminal repeats 33 11 0.9945 NA 2 14.7417 NA NA +AA_WF_I-V_contig_351593 796 No terminal repeats 2 11 0.9944 NA 2 3.2261 MOBQ NA +AA_WF_I-V_contig_22126 5873 No terminal repeats 6 11 0.9943 NA 0 4.4871 NA NA +AA_WF_I-V_contig_15225 8075 No terminal repeats 10 11 0.9943 NA 0 6.9264 NA NA +AA_WF_I-V_contig_63052 2625 No terminal repeats 4 11 0.9942 NA 0 4.3181 NA NA +AA_WF_I-V_contig_23035 5681 No terminal repeats 11 11 0.9941 NA 0 0.855 NA NA +AA_WF_I-V_contig_38397 3800 No terminal repeats 5 11 0.994 NA 2 5.2557 MOBP1 NA +AA_WF_I-V_contig_95329 1946 No terminal repeats 3 11 0.994 NA 2 4.4089 virb4 NA +AA_WF_I-V_contig_182228 1240 No terminal repeats 2 11 0.994 NA 2 2.9387 MOBP1 NA +AA_WF_I-V_contig_12924 9375 No terminal repeats 14 11 0.9939 NA 3 11.7303 virb4;T_virB3;T_virB2 NA +AA_WF_I-V_contig_31591 4411 No terminal repeats 6 11 0.9938 NA 2 4.112 I_traI;I_traK NA +AA_WF_I-V_contig_18820 6724 No terminal repeats 7 11 0.9938 NA 1 5.4064 NA NA +AA_WF_I-V_contig_39353 3734 No terminal repeats 2 11 0.9938 NA 0 2.9763 NA NA +AA_WF_I-V_contig_3108 35777 No terminal repeats 35 11 0.9937 NA 15 25.5812 F_traG;F_traH;F_traF;F_traN;F_trbC;F_traU;F_traW;F_traV;F_traK;F_traE;F_traL NF012174 +AA_WF_I-V_contig_155267 1383 No terminal repeats 3 11 0.9936 NA 2 3.4366 I_traP;I_traQ NA +AA_WF_I-V_contig_57102 2821 No terminal repeats 3 11 0.9936 NA 0 4.6092 NA NA +AA_WF_I-V_contig_1487 73787 DTR 67 11 0.9936 NA 5 21.4529 MOBF NA +AA_WF_I-V_contig_517 180142 DTR 202 11 0.9936 NA 14 74.5763 T_virB2;T_virB3;virb4;T_virB6;T_virB8;T_virB9;T_virB10;MOBP1;MOBP1 NF012178;NF033105 +AA_WF_I-V_contig_88773 2047 No terminal repeats 4 11 0.9935 NA 1 4.8714 T_virB1 NA + +# att/value pairs of use: topology +# att/value pairs of use:n_genes +# att/value pairs of use: genetic_code +# att/value pairs of use:plasmid_score +# att/value pairs of use:fdr +# attr/value pairs of use: n_hallmarks +# att/value pairs of use: marker_enrichment +# att/value pairs of use:conjugation_genes +# att/value pairs of use:amr_genes diff --git a/tests/data/prophage-raw-data.txt b/tests/data/prophage-raw-data.txt new file mode 100644 index 00000000..ecdaea33 --- /dev/null +++ b/tests/data/prophage-raw-data.txt @@ -0,0 +1,28 @@ +seq_name length topology coordinates n_genes genetic_code virus_score fdr n_hallmarks marker_enrichment taxonomy +AA_WF_I-V_contig_10560 11284 No terminal repeats NA 8 11 0.9837 NA 4 13.6837 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;Autographiviridae +AA_WF_I-V_contig_18546 6810 No terminal repeats NA 5 11 0.9831 NA 1 7.8586 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_14483 8433 No terminal repeats NA 7 11 0.9822 NA 1 9.5757 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_19370 6561 No terminal repeats NA 10 11 0.9821 NA 1 10.7279 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_4559 24602 No terminal repeats NA 21 11 0.9819 NA 8 25.5093 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;Autographiviridae +AA_WF_I-V_contig_23972 5496 No terminal repeats NA 8 11 0.9819 NA 2 9.9477 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_27267 4960 No terminal repeats NA 9 11 0.9812 NA 3 14.2375 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_40677 3640 No terminal repeats NA 6 11 0.9811 NA 1 7.8946 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_2760 40546 No terminal repeats NA 75 11 0.9808 NA 5 52.012 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_7019 16346 No terminal repeats NA 17 11 0.9808 NA 3 17.042 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;Autographiviridae +AA_WF_I-V_contig_16924 7381 No terminal repeats NA 5 11 0.9807 NA 1 8.3873 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_24076 5475 No terminal repeats NA 11 11 0.9806 NA 1 14.4464 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; +AA_WF_I-V_contig_3062|provirus_3_11663 11661 Provirus 3-11663 12 11 0.9805 NA 4 17.3246 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;; + + +# ContigCollection table has NCBITaxon as a slot, value is: NCBITaxon:10239, +# ContigCollection has loads of properties, the only required atm is id. reccommended is cds_phase: +# RefSeq, etc. reusable id, but if not, can make up a hash. +# For this example, we need a ContigCollection_X_Feature object here to related contigcollection to feature +# key/value pairs: 17.3246 geNomad score +# key/value pairs: Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes; +# Simon says most of the time is a NCBITaxon because geNomad uses that for classification, but can get out of sync +# better to capture what we have. +# ContigCollection also has a required "type" slot: https://kbase.github.io/cdm-schema/ContigCollectionType/ +# KBase has a type here because they/we can make an arbitrary (or not) collection of contigs. this type field helps +# to group contig collections by how they were generated. (e.g. metagenome means the collection of contigs came out of a +# sequencing project that was metagenomic, etc.). diff --git a/tests/data/valid/Feature-example2.yaml b/tests/data/valid/Feature-example2.yaml new file mode 100644 index 00000000..fbec1ea0 --- /dev/null +++ b/tests/data/valid/Feature-example2.yaml @@ -0,0 +1,6 @@ +feature_id: SP:AA_WF_I-V_contig_20285 # SP= someplasmid +hash: "7d793037a0760186574b0282f2f435e7" +start: 1 +end: 40484 +type: "SO:0000155" +p_value: 0.9953 # plasmid_score diff --git a/tests/data/valid/Feature-example3.yaml b/tests/data/valid/Feature-example3.yaml new file mode 100644 index 00000000..84d9df6a --- /dev/null +++ b/tests/data/valid/Feature-example3.yaml @@ -0,0 +1,6 @@ +feature_id: PV:AA_WF_I-V_contig_3062_provirus_3_11663 # PV= provirus prophage is a kind of provirus. +hash: "3c6e0b8a9c15224a8228b9a98ca1531d" +start: 3 +end: 11663 +p_value: 0.9805 # virus_score? +type: "SO:0001006" # currently no provirus term in SO, so this is for prophage. WRONG. diff --git a/tests/data/valid/Feature.yaml b/tests/data/valid/Feature.yaml new file mode 100644 index 00000000..9885276b --- /dev/null +++ b/tests/data/valid/Feature.yaml @@ -0,0 +1,11 @@ +feature_id: "CDM:f47ac10b-58cc-4372-a567-0e02b2c3d479" +hash: "5d41402abc4b2a76b9719d911017c592" +start: 125000 +end: 175000 +strand: positive +type: "SO:0001006" +source_database: "RefSeq" +cds_phase: "0" +e_value: 1.2e-15 +p_value: 0.001 +