Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 8 additions & 13 deletions .github/workflows/linkml_tasks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,11 @@ jobs:
make lint-no-warn
continue-on-error: true

# - name: Validate sample data against the schema
# id: test_sample_data
# run: |
# make test-sample-data
# continue-on-error: true

# - name: Validate sample data against JSONschema
# id: test_sample_data_jsonschema
# run: |
# make test-sample-data-jsonschema
# continue-on-error: true
- name: Run LinkML example files
id: run_linkml_examples
run: |
make test-examples
continue-on-error: true

- name: Test documentation generation
id: test_docgen
Expand All @@ -61,19 +55,20 @@ jobs:
# continue-on-error: true

- name: outcome failure
if: steps.lint_linkml.outcome != 'success' || steps.validate_linkml.outcome != 'success' || steps.test_docgen.outcome != 'success'
if: steps.lint_linkml.outcome != 'success' || steps.validate_linkml.outcome != 'success' || steps.run_linkml_examples.outcome != 'success' || steps.test_docgen.outcome != 'success'
# steps.test_sample_data.outcome != 'success' || steps.test_sample_data_jsonschema.outcome != 'success'

run: |
echo "linkml linting: ${{ steps.lint_linkml.outcome }}"
echo "linkml schema validation: ${{ steps.validate_linkml.outcome }}"
echo "linkml example runs: ${{ steps.run_linkml_examples.outcome }}"
echo "documentation generator: ${{ steps.test_docgen.outcome }}"
exit 1
# echo "sample data validation: ${{ steps.test_sample_data.outcome }}"
# echo "sample data JSON Schema validation: ${{ steps.test_sample_data_jsonschema.outcome }}"

- name: outcome success
if: steps.lint_linkml.outcome == 'success' && steps.validate_linkml.outcome == 'success' && steps.test_docgen.outcome == 'success'
if: steps.lint_linkml.outcome == 'success' && steps.validate_linkml.outcome == 'success' && steps.linkml_run_examples.outcome == 'success' && steps.test_docgen.outcome == 'success'
run: |
echo All tests passed successfully!
exit 0
23 changes: 7 additions & 16 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ LINKML_DIR = $(SRC_DIR)/linkml
JSONSCHEMA_DIR = $(SRC_DIR)/jsonschema
PYTHON_DIR = $(SRC_DIR)/cdm_schema
# sample data
SAMPLE_DATA_DIR = sample_data
SAMPLE_DATA_DIR = test/data

# unused
SHEET_MODULE = $(LINKML_SCHEMA_GOOGLE_SHEET_MODULE)
Expand Down Expand Up @@ -164,12 +164,6 @@ lint-validate: ## validate the schema; warnings or errors result in a non-zero
lint-validate-no-warn: ## validate the schema; warnings do not result in a non-zero exit code
$(RUN) linkml-lint --ignore-warnings --validate $(LINKML_DIR)

test-sample-data: ## validate sample data against LinkML schema
$(RUN) linkml-validate -s $(LINKML_SCHEMA_FILE) sample_data/**/**/*.json

test-sample-data-jsonschema: ## validate sample data against JSONschema
$(RUN) check-jsonschema --schemafile $(JSONSCHEMA_DIR)/$(SCHEMA_BASE_NAME).schema.json --verbose sample_data/**/**/*.json

check-config:
ifndef LINKML_SCHEMA_NAME
$(error **Project not configured**:\n\n - See '.env.public'\n\n)
Expand All @@ -187,18 +181,15 @@ examples/%.json: $(SAMPLE_DATA_DIR)/%.yaml
examples/%.ttl: $(SAMPLE_DATA_DIR)/%.yaml
$(RUN) linkml-convert -P EXAMPLE=http://example.org/ -s $(LINKML_SCHEMA_FILE) -C $(SCHEMA_ROOT) $< -o $@

test-examples: examples/output

examples/output: src/$(SCHEMA_NAME)/schema/$(SCHEMA_NAME).yaml
mkdir -p $@
test-examples:
$(RUN) linkml-run-examples \
--output-formats json \
--input-formats yaml \
--output-formats yaml \
--counter-example-input-directory $(SAMPLE_DATA_DIR)/invalid \
--input-directory $(SAMPLE_DATA_DIR)/valid \
--output-directory $@ \
--schema $< > $@/README.md

--counter-example-input-directory tests/data/invalid \
--input-directory tests/data/valid \
--output-directory examples/output \
--schema $(LINKML_SCHEMA_FILE) > examples/output/README.md

serve: mkd-serve ## Test documentation locally

Expand Down
37 changes: 37 additions & 0 deletions examples/output/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
## Feature
### Input
```yaml
cds_phase: '0'
e_value: 1.2e-15
end: 175000
feature_id: CDM:f47ac10b-58cc-4372-a567-0e02b2c3d479
hash: 5d41402abc4b2a76b9719d911017c592
p_value: 0.001
source_database: RefSeq
start: 125000
strand: positive
type: SO:0001006

```
## Feature-example2
### Input
```yaml
end: 40484
feature_id: SP:AA_WF_I-V_contig_20285
hash: 7d793037a0760186574b0282f2f435e7
p_value: 0.9953
start: 1
type: SO:0000155

```
## Feature-example3
### Input
```yaml
end: 11663
feature_id: PV:AA_WF_I-V_contig_3062_provirus_3_11663
hash: 3c6e0b8a9c15224a8228b9a98ca1531d
p_value: 0.9805
start: 3
type: SO:0001006

```
44 changes: 44 additions & 0 deletions tests/data/plasmid-raw-data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
seq_name length topology n_genes genetic_code plasmid_score fdr n_hallmarks marker_enrichment conjugation_genes amr_genes
AA_WF_I-V_contig_20285 6313 No terminal repeats 5 11 0.9956 NA 0 4.6079 NA NA
AA_WF_I-V_contig_165867 1322 No terminal repeats 2 11 0.9956 NA 2 3.2077 MOBQ NA
AA_WF_I-V_contig_163871 1333 No terminal repeats 2 11 0.9955 NA 2 3.0908 NA NA
AA_WF_I-V_contig_2765 40484 No terminal repeats 45 11 0.9953 NA 12 34.9604 T_virB2;T_virB11;MOBP1;T_virB5;T_virB10;T_virB9;T_virB8;virb4 NA
AA_WF_I-V_contig_22984 5691 No terminal repeats 4 11 0.9952 NA 1 4.889 NA NA
AA_WF_I-V_contig_20770 6179 No terminal repeats 5 11 0.9952 NA 0 5.9478 NA NA
AA_WF_I-V_contig_72789 2361 No terminal repeats 6 11 0.995 NA 3 5.6278 MOBQ NA
AA_WF_I-V_contig_32632 4304 No terminal repeats 6 11 0.995 NA 1 9.0599 MOBP1 NA
AA_WF_I-V_contig_9167 12808 No terminal repeats 11 11 0.9949 NA 0 8.693 NA NA
AA_WF_I-V_contig_21980 5903 No terminal repeats 8 11 0.9948 NA 1 12.73 NA NA
AA_WF_I-V_contig_46174 3309 No terminal repeats 3 11 0.9947 NA 1 3.6934 NA NA
AA_WF_I-V_contig_62445 2645 No terminal repeats 3 11 0.9946 NA 0 4.3487 NA NA
AA_WF_I-V_contig_4297 26024 No terminal repeats 23 11 0.9946 NA 14 21.2045 F_traG;F_traH;F_traF;F_trbC;F_trbC;F_traU;F_traW;F_traV;F_traK;F_traE;F_traL NA
AA_WF_I-V_contig_114914 1704 No terminal repeats 2 11 0.9945 NA 2 2.1114 NA NA
AA_WF_I-V_contig_3457 32153 No terminal repeats 33 11 0.9945 NA 2 14.7417 NA NA
AA_WF_I-V_contig_351593 796 No terminal repeats 2 11 0.9944 NA 2 3.2261 MOBQ NA
AA_WF_I-V_contig_22126 5873 No terminal repeats 6 11 0.9943 NA 0 4.4871 NA NA
AA_WF_I-V_contig_15225 8075 No terminal repeats 10 11 0.9943 NA 0 6.9264 NA NA
AA_WF_I-V_contig_63052 2625 No terminal repeats 4 11 0.9942 NA 0 4.3181 NA NA
AA_WF_I-V_contig_23035 5681 No terminal repeats 11 11 0.9941 NA 0 0.855 NA NA
AA_WF_I-V_contig_38397 3800 No terminal repeats 5 11 0.994 NA 2 5.2557 MOBP1 NA
AA_WF_I-V_contig_95329 1946 No terminal repeats 3 11 0.994 NA 2 4.4089 virb4 NA
AA_WF_I-V_contig_182228 1240 No terminal repeats 2 11 0.994 NA 2 2.9387 MOBP1 NA
AA_WF_I-V_contig_12924 9375 No terminal repeats 14 11 0.9939 NA 3 11.7303 virb4;T_virB3;T_virB2 NA
AA_WF_I-V_contig_31591 4411 No terminal repeats 6 11 0.9938 NA 2 4.112 I_traI;I_traK NA
AA_WF_I-V_contig_18820 6724 No terminal repeats 7 11 0.9938 NA 1 5.4064 NA NA
AA_WF_I-V_contig_39353 3734 No terminal repeats 2 11 0.9938 NA 0 2.9763 NA NA
AA_WF_I-V_contig_3108 35777 No terminal repeats 35 11 0.9937 NA 15 25.5812 F_traG;F_traH;F_traF;F_traN;F_trbC;F_traU;F_traW;F_traV;F_traK;F_traE;F_traL NF012174
AA_WF_I-V_contig_155267 1383 No terminal repeats 3 11 0.9936 NA 2 3.4366 I_traP;I_traQ NA
AA_WF_I-V_contig_57102 2821 No terminal repeats 3 11 0.9936 NA 0 4.6092 NA NA
AA_WF_I-V_contig_1487 73787 DTR 67 11 0.9936 NA 5 21.4529 MOBF NA
AA_WF_I-V_contig_517 180142 DTR 202 11 0.9936 NA 14 74.5763 T_virB2;T_virB3;virb4;T_virB6;T_virB8;T_virB9;T_virB10;MOBP1;MOBP1 NF012178;NF033105
AA_WF_I-V_contig_88773 2047 No terminal repeats 4 11 0.9935 NA 1 4.8714 T_virB1 NA

# att/value pairs of use: topology
# att/value pairs of use:n_genes
# att/value pairs of use: genetic_code
# att/value pairs of use:plasmid_score
# att/value pairs of use:fdr
# attr/value pairs of use: n_hallmarks
# att/value pairs of use: marker_enrichment
# att/value pairs of use:conjugation_genes
# att/value pairs of use:amr_genes
28 changes: 28 additions & 0 deletions tests/data/prophage-raw-data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
seq_name length topology coordinates n_genes genetic_code virus_score fdr n_hallmarks marker_enrichment taxonomy
AA_WF_I-V_contig_10560 11284 No terminal repeats NA 8 11 0.9837 NA 4 13.6837 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;Autographiviridae
AA_WF_I-V_contig_18546 6810 No terminal repeats NA 5 11 0.9831 NA 1 7.8586 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_14483 8433 No terminal repeats NA 7 11 0.9822 NA 1 9.5757 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_19370 6561 No terminal repeats NA 10 11 0.9821 NA 1 10.7279 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_4559 24602 No terminal repeats NA 21 11 0.9819 NA 8 25.5093 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;Autographiviridae
AA_WF_I-V_contig_23972 5496 No terminal repeats NA 8 11 0.9819 NA 2 9.9477 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_27267 4960 No terminal repeats NA 9 11 0.9812 NA 3 14.2375 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_40677 3640 No terminal repeats NA 6 11 0.9811 NA 1 7.8946 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_2760 40546 No terminal repeats NA 75 11 0.9808 NA 5 52.012 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_7019 16346 No terminal repeats NA 17 11 0.9808 NA 3 17.042 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;Autographiviridae
AA_WF_I-V_contig_16924 7381 No terminal repeats NA 5 11 0.9807 NA 1 8.3873 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_24076 5475 No terminal repeats NA 11 11 0.9806 NA 1 14.4464 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;
AA_WF_I-V_contig_3062|provirus_3_11663 11661 Provirus 3-11663 12 11 0.9805 NA 4 17.3246 Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;;


# ContigCollection table has NCBITaxon as a slot, value is: NCBITaxon:10239,
# ContigCollection has loads of properties, the only required atm is id. reccommended is cds_phase:
# RefSeq, etc. reusable id, but if not, can make up a hash.
# For this example, we need a ContigCollection_X_Feature object here to related contigcollection to feature
# key/value pairs: 17.3246 geNomad score
# key/value pairs: Viruses;Duplodnaviria;Heunggongvirae;Uroviricota;Caudoviricetes;
# Simon says most of the time is a NCBITaxon because geNomad uses that for classification, but can get out of sync
# better to capture what we have.
# ContigCollection also has a required "type" slot: https://kbase.github.io/cdm-schema/ContigCollectionType/
# KBase has a type here because they/we can make an arbitrary (or not) collection of contigs. this type field helps
# to group contig collections by how they were generated. (e.g. metagenome means the collection of contigs came out of a
# sequencing project that was metagenomic, etc.).
6 changes: 6 additions & 0 deletions tests/data/valid/Feature-example2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
feature_id: SP:AA_WF_I-V_contig_20285 # SP= someplasmid
hash: "7d793037a0760186574b0282f2f435e7"
start: 1
end: 40484
type: "SO:0000155"
p_value: 0.9953 # plasmid_score
6 changes: 6 additions & 0 deletions tests/data/valid/Feature-example3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
feature_id: PV:AA_WF_I-V_contig_3062_provirus_3_11663 # PV= provirus prophage is a kind of provirus.
hash: "3c6e0b8a9c15224a8228b9a98ca1531d"
start: 3
end: 11663
p_value: 0.9805 # virus_score?
type: "SO:0001006" # currently no provirus term in SO, so this is for prophage. WRONG.
11 changes: 11 additions & 0 deletions tests/data/valid/Feature.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
feature_id: "CDM:f47ac10b-58cc-4372-a567-0e02b2c3d479"
hash: "5d41402abc4b2a76b9719d911017c592"
start: 125000
end: 175000
strand: positive
type: "SO:0001006"
source_database: "RefSeq"
cds_phase: "0"
e_value: 1.2e-15
p_value: 0.001

Loading