From 21948ed685d1d3354b08b5269c9e1fc6c53816f8 Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Mon, 29 Nov 2021 16:05:51 +0100 Subject: [PATCH 1/6] new version, removing ades component and relation extraction --- .../pretox-dictionaries/dose_unit.lst | 20 ++++ .../group_qualification_basic.lst | 7 ++ .../is_treatment_related_triggers.lst | 110 ++++++++++++++++++ dictionaries/pretox-dictionaries/lists.def | 4 + dictionaries/pretox-dictionaries/sex.lst | 15 +++ .../cdisc_pkunit_dose_unit_not_used.jape | 36 ++++++ .../cdisc-etox/etox_ilo_trigger_mapping.jape | 3 +- jape_rules/cdisc-etox/etox_send_mapping.jape | 4 +- jape_rules/main.jape | 13 ++- .../treatment_related_negation.jape | 39 +++++++ .../relation-extraction/finding_dose.jape | 34 ++++++ .../relation-extraction/finding_group.jape | 28 +++++ .../finding_is_treatment_related.jape | 28 +++++ .../relation-extraction/finding_sex.jape | 29 +++++ .../relation-extraction/finding_specimen.jape | 29 +++++ jape_rules/rules-matching/dose_mapping.jape | 32 +++++ .../dose_qualification_rule.jape | 21 ++-- .../group_basic_qualifier_mapping.jape | 43 +++++++ .../group_dose_qualifier_mapping.jape | 39 +++++++ jape_rules/rules-matching/group_mapping.jape | 42 +++++++ .../statistical_significance_mapping.jape | 37 ++++++ 21 files changed, 598 insertions(+), 15 deletions(-) create mode 100644 dictionaries/pretox-dictionaries/dose_unit.lst create mode 100644 dictionaries/pretox-dictionaries/group_qualification_basic.lst create mode 100644 dictionaries/pretox-dictionaries/is_treatment_related_triggers.lst create mode 100644 dictionaries/pretox-dictionaries/sex.lst create mode 100644 jape_rules/cdisc-etox/cdisc_pkunit_dose_unit_not_used.jape create mode 100644 jape_rules/pretox-dictionaries/treatment_related_negation.jape create mode 100644 jape_rules/relation-extraction/finding_dose.jape create mode 100644 jape_rules/relation-extraction/finding_group.jape create mode 100644 jape_rules/relation-extraction/finding_is_treatment_related.jape create mode 100644 jape_rules/relation-extraction/finding_sex.jape create mode 100644 jape_rules/relation-extraction/finding_specimen.jape create mode 100644 jape_rules/rules-matching/dose_mapping.jape rename jape_rules/{postprocessing-and-lexical-rules-matching => rules-matching}/dose_qualification_rule.jape (62%) create mode 100644 jape_rules/rules-matching/group_basic_qualifier_mapping.jape create mode 100644 jape_rules/rules-matching/group_dose_qualifier_mapping.jape create mode 100644 jape_rules/rules-matching/group_mapping.jape create mode 100644 jape_rules/rules-matching/statistical_significance_mapping.jape diff --git a/dictionaries/pretox-dictionaries/dose_unit.lst b/dictionaries/pretox-dictionaries/dose_unit.lst new file mode 100644 index 0000000..e16535e --- /dev/null +++ b/dictionaries/pretox-dictionaries/dose_unit.lst @@ -0,0 +1,20 @@ +g +mg +kg +lb +ml +dg +ng +mcg +mcl +mg/kg +mg/kg/day +mg/kg/dose +mg/kg/d +g/day +mcmol/g +mg/m +pg/kg +pmol/kg +μmol Gd/kg bw +ug/ml \ No newline at end of file diff --git a/dictionaries/pretox-dictionaries/group_qualification_basic.lst b/dictionaries/pretox-dictionaries/group_qualification_basic.lst new file mode 100644 index 0000000..a8e2128 --- /dev/null +++ b/dictionaries/pretox-dictionaries/group_qualification_basic.lst @@ -0,0 +1,7 @@ +control +treated +treatment +dose +dosed +compound-treated +main \ No newline at end of file diff --git a/dictionaries/pretox-dictionaries/is_treatment_related_triggers.lst b/dictionaries/pretox-dictionaries/is_treatment_related_triggers.lst new file mode 100644 index 0000000..8d5ca75 --- /dev/null +++ b/dictionaries/pretox-dictionaries/is_treatment_related_triggers.lst @@ -0,0 +1,110 @@ +treatment related finding SEND_CODE=Y +treatment related findings SEND_CODE=Y +treatment related effect SEND_CODE=Y +treatment-related effect SEND_CODE=Y +treatment-related effects SEND_CODE=Y +treatment related effects SEND_CODE=Y +treatment-related-finding SEND_CODE=Y +treatment-related-findings SEND_CODE=Y +related effects SEND_CODE=Y +relevant effect SEND_CODE=Y +caused findings SEND_CODE=Y +relevants effects SEND_CODE=Y +relevants effect SEND_CODE=Y +relevant effects SEND_CODE=Y +related effect SEND_CODE=Y +adverse effects SEND_CODE=Y +dose related SEND_CODE=Y +dose-related SEND_CODE=Y +related to dose SEND_CODE=Y +compound-related effect SEND_CODE=Y +compound-related effects SEND_CODE=Y +compound- related SEND_CODE=Y +compound-related SEND_CODE=Y +compound related SEND_CODE=Y +compound related findings SEND_CODE=Y +compound related finding SEND_CODE=Y +compound-related findings SEND_CODE=Y +compound-related finding SEND_CODE=Y +substance-related SEND_CODE=Y +substance- related SEND_CODE=Y +substance related SEND_CODE=Y +compound effect SEND_CODE=Y +compound effects SEND_CODE=Y +effect of the compound SEND_CODE=Y +effects of the compound SEND_CODE=Y +effects of the compounds SEND_CODE=Y +effect of the compounds SEND_CODE=Y +toxicological findings SEND_CODE=Y +toxicological finding SEND_CODE=Y +elicit any overt toxicity SEND_CODE=Y +elicit any toxicity SEND_CODE=Y +elicit toxicity SEND_CODE=Y +overt toxicity SEND_CODE=Y +considered to be toxicologically SEND_CODE=Y +considered to be toxicological SEND_CODE=Y +toxicologically relevant SEND_CODE=Y +toxicological relevant findings SEND_CODE=Y +toxicological relevant finding SEND_CODE=Y +related to the pharmacological activity of the compound SEND_CODE=Y +affected by the test compound SEND_CODE=Y +affected by the compound SEND_CODE=Y +affected by the test compounds SEND_CODE=Y +affected by the compounds SEND_CODE=Y +related effect SEND_CODE=Y +related effect found SEND_CODE=Y +treatment-related SEND_CODE=Y +treatment related SEND_CODE=Y +further finding SEND_CODE=Y +related findings SEND_CODE=Y +related finding SEND_CODE=Y +effect of treatment SEND_CODE=Y +regarded as adverse effect SEND_CODE=Y +regarded as adverse effects SEND_CODE=Y +were regarded or suspected to be treatment-related SEND_CODE=Y +were regarded to be treatment-related SEND_CODE=Y +were considered to be treatment-related SEND_CODE=Y +suspected to be treatment-related SEND_CODE=Y +attributable to treatment SEND_CODE=Y +administration of the compound SEND_CODE=Y +findings attributable to treatment SEND_CODE=Y +findings attributable to compound SEND_CODE=Y +finding attributable to treatment SEND_CODE=Y +finding attributable to compound SEND_CODE=Y +effect attributable to treatment SEND_CODE=Y +effects attributable to treatment SEND_CODE=Y +effect attributable to compound SEND_CODE=Y +effects attributable to compound SEND_CODE=Y +toxic effect SEND_CODE=Y +toxic effects SEND_CODE=Y +test article-related SEND_CODE=Y +no compound-related effect SEND_CODE=N +not treatment-related SEND_CODE=N +no compound-related effect SEND_CODE=N +not compound-related SEND_CODE=N +rather than a treatment-related effect SEND_CODE=N +not considered as a compound-related effect SEND_CODE=N +no evidence for treatment-related effects SEND_CODE=N +no evidence for treatment-related effect SEND_CODE=N +treatment-related effect is not assumed SEND_CODE=N +no related effect SEND_CODE=N +unaffected by treatment SEND_CODE=N +no related effect found SEND_CODE=N +no treatment-related SEND_CODE=N +no treatment related SEND_CODE=N +no further finding SEND_CODE=N +no related findings SEND_CODE=N +no related finding SEND_CODE=N +no relevant compound-related effect SEND_CODE=N +no relevant compound-related effects SEND_CODE=N +not suspicious for a treatment-related effect SEND_CODE=N +unrelated to the treatment SEND_CODE=N +not to be treatment-related SEND_CODE=N +not regarded to be treatment-related SEND_CODE=N +not regarded as being treatment-related SEND_CODE=N +doubtful toxicological significance SEND_CODE=N +reaction is rather unremarkable SEND_CODE=N +unaffected by the test treatment SEND_CODE=N +unaffected by the treatment SEND_CODE=N +uncertain SEND_CODE=U +incidental SEND_CODE=I \ No newline at end of file diff --git a/dictionaries/pretox-dictionaries/lists.def b/dictionaries/pretox-dictionaries/lists.def index 8eb0d10..85de4f1 100644 --- a/dictionaries/pretox-dictionaries/lists.def +++ b/dictionaries/pretox-dictionaries/lists.def @@ -4,4 +4,8 @@ risk_level.lst:MY_ONTOLOGY:RISK_LEVEL sdomain.lst:MY_ONTOLOGY:STUDY_DOMAIN findings.lst:MY_ONTOLOGY:FINDING study_testcd.lst:MY_ONTOLOGY:STUDY_TESTCD +sex.lst:MY_ONTOLOGY:SEX negation.lst:MY_ONTOLOGY:NEGATION +is_treatment_related_triggers.lst:MY_ONTOLOGY:IS_TREATMENT_RELATED_TRIGGER +dose_unit.lst:MY_ONTOLOGY:DOSE_UNIT +group_qualification_basic.lst:MY_ONTOLOGY:BASIC_GROUP_QUALIFICATION \ No newline at end of file diff --git a/dictionaries/pretox-dictionaries/sex.lst b/dictionaries/pretox-dictionaries/sex.lst new file mode 100644 index 0000000..d854f7f --- /dev/null +++ b/dictionaries/pretox-dictionaries/sex.lst @@ -0,0 +1,15 @@ +feminine MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C16576 CDISC_SEND_CODE=F +feminines MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C16576 CDISC_SEND_CODE=F +female MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C16576 CDISC_SEND_CODE=F +females MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C16576 CDISC_SEND_CODE=F +masculine MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C20197 CDISC_SEND_CODE=M +macho MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C20197 CDISC_SEND_CODE=M +machos MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C20197 CDISC_SEND_CODE=M +male MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C20197 CDISC_SEND_CODE=M +males MANUAL_CODELIST=C66731 MANUAL_CODELIST_ID=C20197 CDISC_SEND_CODE=M +both sexes MANUAL_CODELIST=C66732 MANUAL_CODELIST_ID=C49636 CDISC_SEND_CODE=B +both sex MANUAL_CODELIST=C66732 MANUAL_CODELIST_ID=C49636 CDISC_SEND_CODE=B +both sexs MANUAL_CODELIST=C66732 MANUAL_CODELIST_ID=C49636 CDISC_SEND_CODE=B +male and famale MANUAL_CODELIST=C66732 MANUAL_CODELIST_ID=C49636 CDISC_SEND_CODE=B +male and female MANUAL_CODELIST=C66732 MANUAL_CODELIST_ID=C49636 CDISC_SEND_CODE=B +males and females MANUAL_CODELIST=C66732 MANUAL_CODELIST_ID=C49636 CDISC_SEND_CODE=B \ No newline at end of file diff --git a/jape_rules/cdisc-etox/cdisc_pkunit_dose_unit_not_used.jape b/jape_rules/cdisc-etox/cdisc_pkunit_dose_unit_not_used.jape new file mode 100644 index 0000000..7cdcda9 --- /dev/null +++ b/jape_rules/cdisc-etox/cdisc_pkunit_dose_unit_not_used.jape @@ -0,0 +1,36 @@ +Imports: { +import static gate.Utils.*; +} +Phase:firstphase +Input: Lookup +Options: control = appelt + +Rule: cdisc_pkunit_dose_unit +( +{Lookup.minorType=="SEND_CIDSC", Lookup.CDISC_CODELIST=="PKUNIT"} +) +:cdisc_pkunit_dose_unit +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("cdisc_pkunit_dose_unit"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + FeatureMap lookupFeatures = ann.getFeatures(); + String label = lookupFeatures.get("LABEL").toString(); + String content = stringFor(doc, ann); + gate.FeatureMap features = Factory.newFeatureMap(); + features.put("text",content); + features.put("SOURCE","CDISC"); + features.put("RULE","cdisc_pkunit_dose_unit"); + features.putAll(lookupFeatures); + features.remove("majorType"); + features.remove("minorType"); + + try{ + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "DOSE_UNIT_CDISC", features); + }catch(InvalidOffsetException e){ + throw new LuckyException(e); + } + //remove old lookup + inputAS.remove(ann); + +} \ No newline at end of file diff --git a/jape_rules/cdisc-etox/etox_ilo_trigger_mapping.jape b/jape_rules/cdisc-etox/etox_ilo_trigger_mapping.jape index 9a46574..02345df 100644 --- a/jape_rules/cdisc-etox/etox_ilo_trigger_mapping.jape +++ b/jape_rules/cdisc-etox/etox_ilo_trigger_mapping.jape @@ -28,7 +28,8 @@ Rule: etox_ilo_trigger_mapping features.putAll(lookupFeatures); try{ - outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "NO_TREATMENT_RELATED_TRIGGER", features); + //in these case from etox this means no finding detection, normal things and so on. This is not a no treatment_related_trigger + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "NO_FINDING_TRIGGER", features); }catch(InvalidOffsetException e){ throw new LuckyException(e); } diff --git a/jape_rules/cdisc-etox/etox_send_mapping.jape b/jape_rules/cdisc-etox/etox_send_mapping.jape index d36f4b8..7ebb31c 100644 --- a/jape_rules/cdisc-etox/etox_send_mapping.jape +++ b/jape_rules/cdisc-etox/etox_send_mapping.jape @@ -46,14 +46,14 @@ Rule: etox_send_mapping //remove old lookup inputAS.remove(ann); }else if(label.equals("SEXPOP")){ - if(content.length()==1) { + /*if(content.length()==1) { features.put("abrev", "true"); } try{ outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "SEX", features); }catch(InvalidOffsetException e){ throw new LuckyException(e); - } + }*/ //remove old lookup inputAS.remove(ann); }else if(label.equals("STRAIN")){ diff --git a/jape_rules/main.jape b/jape_rules/main.jape index db3cf78..c55cf7d 100644 --- a/jape_rules/main.jape +++ b/jape_rules/main.jape @@ -12,6 +12,12 @@ cdisc-etox/etox_send_mapping cdisc-etox/etox_moa_mapping pretox-dictionaries/basic_mapping pretox-dictionaries/remove_false_negation +pretox-dictionaries/treatment_related_negation +rules-matching/dose_mapping +rules-matching/dose_qualification_rule +rules-matching/group_mapping +rules-matching/group_dose_qualifier_mapping +rules-matching/group_basic_qualifier_mapping limtox-hepatoxicity-dictionaries/basic_mapping postprocessing-and-lexical-rules-matching/remove_dnorm_false_positives postprocessing-and-lexical-rules-matching/stop_words @@ -23,7 +29,7 @@ postprocessing-and-lexical-rules-matching/merge_annotations postprocessing-and-lexical-rules-matching/merge_hepatotoxicity postprocessing-and-lexical-rules-matching/merge_livermarker postprocessing-and-lexical-rules-matching/remove_study_domain -postprocessing-and-lexical-rules-matching/dose_qualification_rule + postprocessing-and-lexical-rules-matching/remove_manifestation_findings_false_positive_no_finding postprocessing-and-lexical-rules-matching/study_testcd_manifestation_finding postprocessing-and-lexical-rules-matching/study_testcd_over_finding @@ -36,4 +42,9 @@ postprocessing-and-lexical-rules-matching/negation_finding relation-extraction/dose_sex relation-extraction/dose_sex_multi relation-extraction/finding_dose_sex +relation-extraction/finding_is_treatment_related +relation-extraction/finding_group +relation-extraction/finding_dose +relation-extraction/finding_sex +relation-extraction/finding_specimen delete_lookups \ No newline at end of file diff --git a/jape_rules/pretox-dictionaries/treatment_related_negation.jape b/jape_rules/pretox-dictionaries/treatment_related_negation.jape new file mode 100644 index 0000000..6ac5b60 --- /dev/null +++ b/jape_rules/pretox-dictionaries/treatment_related_negation.jape @@ -0,0 +1,39 @@ +Imports: { +import static gate.Utils.*; +} +Phase:secondphase +Input: NEGATION IS_TREATMENT_RELATED_TRIGGER Token +Options: control = appelt +Rule: treatment_related_negation +( +{NEGATION} ({Token})[0,3] {IS_TREATMENT_RELATED_TRIGGER.SEND_CODE=="Y"} +) :treatment_related_negation +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("treatment_related_negation"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + //System.out.println("treatment_related_negation"); + //System.out.println(stringFor(doc, sentence)); + try{ + gate.FeatureMap features = Factory.newFeatureMap(); + String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); + features.put("text",content); + features.put("rule", "treatment_related_negation"); + features.put("SEND_CODE","N"); + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "IS_TREATMENT_RELATED_TRIGGER", features); + //outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "NO_TREATMENT_RELATED_TRIGGER", features); + //tengo que eliminar el trigger de treatment related + + gate.AnnotationSet to_remove = inputAS.get("IS_TREATMENT_RELATED_TRIGGER", lookup.firstNode().getOffset(), lookup.lastNode().getOffset()); + for (Annotation rem : to_remove) { + if(rem.getFeatures().get("SEND_CODE").toString().equals("Y")){ + outputAS.remove(rem); + } + } + }catch(InvalidOffsetException e){ + throw new LuckyException(e); + } + +} \ No newline at end of file diff --git a/jape_rules/relation-extraction/finding_dose.jape b/jape_rules/relation-extraction/finding_dose.jape new file mode 100644 index 0000000..73c328b --- /dev/null +++ b/jape_rules/relation-extraction/finding_dose.jape @@ -0,0 +1,34 @@ +//this rule generate the relations between the finding and the dose sex entities +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: FINDING STUDY_TESTCD +Options: control = appelt +Rule: finding_dose +( +{FINDING} | {STUDY_TESTCD} +) :finding_dose +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("finding_dose"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + + //only if there is no dose_sex and dose_sex_multi + + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + + Annotation dose_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "DOSE", AnnotationUtil.right_limit, AnnotationUtil.left_limit, null); + if(dose_annotation!=null){ + ann.getFeatures().put("DOSE", dose_annotation.getId()); + }else{ + Annotation dose_q_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "DOSE_QUALIFICATION", AnnotationUtil.right_limit, AnnotationUtil.left_limit, null); + if(dose_q_annotation!=null){ + ann.getFeatures().put("DOSE_QUALIFICATION", dose_q_annotation.getId()); + } + } +} \ No newline at end of file diff --git a/jape_rules/relation-extraction/finding_group.jape b/jape_rules/relation-extraction/finding_group.jape new file mode 100644 index 0000000..22c4043 --- /dev/null +++ b/jape_rules/relation-extraction/finding_group.jape @@ -0,0 +1,28 @@ +//this rule generate the relations between the finding and the dose sex entities +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: FINDING STUDY_TESTCD +Options: control = appelt +Rule: finding_group +( +{FINDING} | {STUDY_TESTCD} +) :finding_group +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("finding_group"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + Annotation group_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "GROUP", AnnotationUtil.right_limit, AnnotationUtil.left_limit, null); + + if(group_annotation!=null){ + ann.getFeatures().put("GROUP", group_annotation.getId()); + } + +} \ No newline at end of file diff --git a/jape_rules/relation-extraction/finding_is_treatment_related.jape b/jape_rules/relation-extraction/finding_is_treatment_related.jape new file mode 100644 index 0000000..ff5de5d --- /dev/null +++ b/jape_rules/relation-extraction/finding_is_treatment_related.jape @@ -0,0 +1,28 @@ +//this rule generate the relations between the finding and the dose sex entities +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: FINDING STUDY_TESTCD +Options: control = appelt +Rule: finding_is_treatment_related +( +{FINDING} | {STUDY_TESTCD} +) :finding_is_treatment_related +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("finding_is_treatment_related"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + Annotation treatment_related_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "IS_TREATMENT_RELATED_TRIGGER", AnnotationUtil.right_limit, AnnotationUtil.left_limit, null); + + if(treatment_related_annotation!=null){ + ann.getFeatures().put("IS_TREATMENT_RELATED", treatment_related_annotation.getId()); + } + +} \ No newline at end of file diff --git a/jape_rules/relation-extraction/finding_sex.jape b/jape_rules/relation-extraction/finding_sex.jape new file mode 100644 index 0000000..e486e9c --- /dev/null +++ b/jape_rules/relation-extraction/finding_sex.jape @@ -0,0 +1,29 @@ +//this rule generate the relations between the finding and the dose sex entities +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: FINDING STUDY_TESTCD +Options: control = appelt +Rule: finding_sex +( +{FINDING} | {STUDY_TESTCD} +) :finding_sex +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("finding_sex"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + + //only if there is no dose_sex and dose_sex_multi + + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + + Annotation dose_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "SEX", AnnotationUtil.right_limit, AnnotationUtil.left_limit, null); + if(dose_annotation!=null){ + ann.getFeatures().put("SEX", dose_annotation.getId()); + } +} \ No newline at end of file diff --git a/jape_rules/relation-extraction/finding_specimen.jape b/jape_rules/relation-extraction/finding_specimen.jape new file mode 100644 index 0000000..8529aeb --- /dev/null +++ b/jape_rules/relation-extraction/finding_specimen.jape @@ -0,0 +1,29 @@ +//this rule generate the relations between the finding and the dose sex entities +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: FINDING STUDY_TESTCD +Options: control = appelt +Rule: finding_specimen +( +{FINDING} | {STUDY_TESTCD} +) :finding_specimen +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("finding_specimen"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + + + + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + + Annotation specimen_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "SPECIMEN", 15, 15, ";"); + if(specimen_annotation!=null){ + ann.getFeatures().put("SPECIMEN", specimen_annotation.getId()); + } +} \ No newline at end of file diff --git a/jape_rules/rules-matching/dose_mapping.jape b/jape_rules/rules-matching/dose_mapping.jape new file mode 100644 index 0000000..38539ec --- /dev/null +++ b/jape_rules/rules-matching/dose_mapping.jape @@ -0,0 +1,32 @@ +Imports: { +import static gate.Utils.*; +} +Phase:secondphase +Input: Token DOSE_UNIT +Options: control = appelt +Rule: dose_mapping +( +({Token.kind=="NUMBER"} | {Token.kind=="DATE"}) (({Token.root == "and"} | {Token.root == "or"} | {Token.root == "to"} | {Token.word =~"[,-]"}) ({Token.kind=="NUMBER"} | {Token.kind=="DATE"}))[0,4] ({DOSE_UNIT} | ({Token} {Token.word=="/"} {Token}) | ({Token} {Token.word=="/"} {Token} {Token.word=="/"} {Token})) +) :dose_mapping +--> +{ + //$DOSE_SEP = "/,|-|and|or|to/" + //DOSE_QUANTITY {(( ([{ner:NUMBER} | {word::IS_NUM}] [{ word:$DOSE_SEP }])* [{ner:NUMBER} | {word::IS_NUM}] [{word:$DOSE_SEP}]* [{ner:DOSE_UNIT} | {ner:DOSE_UNIT_MANUAL} | word:/\D+\/\D+/ | word:/\D+\/\D+\/\D+/] )) => "DOSE" } + + + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("dose_mapping"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + //gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + //gate.Annotation sentence = sentence_set.iterator().next(); + try{ + gate.FeatureMap features = Factory.newFeatureMap(); + String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); + features.put("text",content); + features.put("rule", "dose_mapping"); + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "DOSE", features); + + }catch(InvalidOffsetException e){ + throw new LuckyException(e); + } + +} \ No newline at end of file diff --git a/jape_rules/postprocessing-and-lexical-rules-matching/dose_qualification_rule.jape b/jape_rules/rules-matching/dose_qualification_rule.jape similarity index 62% rename from jape_rules/postprocessing-and-lexical-rules-matching/dose_qualification_rule.jape rename to jape_rules/rules-matching/dose_qualification_rule.jape index a4ddf3f..62aa91e 100644 --- a/jape_rules/postprocessing-and-lexical-rules-matching/dose_qualification_rule.jape +++ b/jape_rules/rules-matching/dose_qualification_rule.jape @@ -3,30 +3,29 @@ import static gate.Utils.*; } Phase:secondphase Input: MANIFESTATION_FINDING Token -Options: control = all -Rule: dose_rule_match +Options: control = appelt +Rule: dose_qualification_rule ( -{MANIFESTATION_FINDING} +({MANIFESTATION_FINDING} | {Token.root=="mid"}) ({Token.word=~"[Dd]ose"} | {Token.word=~"[Dd]osages"} | {Token.word=~"[Dd]ose-dependently"} ) ) -:dose_rule_match +:dose_qualification_rule --> { - gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("dose_rule_match"); + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("dose_qualification_rule"); String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); try{ gate.FeatureMap features = Factory.newFeatureMap(); - - //System.out.println("ACA HAY UN MANIFESTATION_FINDING Y DOSE"); - for (Annotation ann : lookup){ + //hardcoded only for mid, if not create manifestation for doses list + features.put("dose_qualification","mid"); + for (Annotation ann : lookup){ if(ann.getType().equals("MANIFESTATION_FINDING")){ - features.put("qualification",ann.getFeatures().get("text")); + features.put("dose_qualification",ann.getFeatures().get("text")); outputAS.remove(ann); } } - features.put("text",content); - features.put("rule", "dose_rule_match"); + features.put("rule", "dose_qualification_rule"); outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "DOSE_QUALIFICATION", features); }catch(InvalidOffsetException e){ throw new LuckyException(e); diff --git a/jape_rules/rules-matching/group_basic_qualifier_mapping.jape b/jape_rules/rules-matching/group_basic_qualifier_mapping.jape new file mode 100644 index 0000000..f0e6214 --- /dev/null +++ b/jape_rules/rules-matching/group_basic_qualifier_mapping.jape @@ -0,0 +1,43 @@ +Imports: { +import static gate.Utils.*; +} +Phase:secondphase +Input: Token BASIC_GROUP_QUALIFICATION +Options: control = appelt +Rule: group_basic_qualifier_mapping +( +({BASIC_GROUP_QUALIFICATION} {Token.root=~"[Gg]roup"}) + +):group_basic_qualifier_mapping +--> +{ + + + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("group_basic_qualifier_mapping"); + + gate.FeatureMap features = Factory.newFeatureMap(); + + gate.AnnotationSet group_ann = outputAS.get("GROUP",lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); + if(group_ann.size()<1){ + try{ + String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); + features.put("text",content); + features.put("rule", "group_basic_qualifier_mapping"); + for (Annotation ann : lookup){ + if(ann.getType().equals("BASIC_GROUP_QUALIFICATION")){ + features.put("basic_qualification",ann.getFeatures().get("text")); + //outputAS.remove(ann); + } + } + + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "GROUP", features); + + }catch(InvalidOffsetException e){ + throw new LuckyException(e); + } + } + + + + +} \ No newline at end of file diff --git a/jape_rules/rules-matching/group_dose_qualifier_mapping.jape b/jape_rules/rules-matching/group_dose_qualifier_mapping.jape new file mode 100644 index 0000000..998ad67 --- /dev/null +++ b/jape_rules/rules-matching/group_dose_qualifier_mapping.jape @@ -0,0 +1,39 @@ +Imports: { +import static gate.Utils.*; +} +Phase:secondphase +Input: Token DOSE_QUALIFICATION +Options: control = appelt +Rule: group_dose_qualifier_mapping +( +({DOSE_QUALIFICATION} {Token.root=~"[Gg]roup"}) + +):group_qualifier_mapping +--> +{ +//{ (( [{tag:/JJ.*/} ] [{tag:/JJ.*|NN.*/} ] [{ word:/group[s]*/; tag:/NN.*/}] ) ) => "GROUP" } +//{ (( [{tag:/JJ.*|NN.*/} ] [{ word:/group[s]*/; tag:/NN.*/}] ) ) => "GROUP" } + + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("group_qualifier_mapping"); + + try{ + gate.FeatureMap features = Factory.newFeatureMap(); + String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); + features.put("text",content); + features.put("rule", "group_dose_qualifier_mapping"); + for (Annotation ann : lookup){ + if(ann.getType().equals("DOSE_QUALIFICATION")){ + features.put("dose_qualification",ann.getFeatures().get("dose_qualification")); + //outputAS.remove(ann); + } + } + + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "GROUP", features); + + + + }catch(InvalidOffsetException e){ + throw new LuckyException(e); + } + +} \ No newline at end of file diff --git a/jape_rules/rules-matching/group_mapping.jape b/jape_rules/rules-matching/group_mapping.jape new file mode 100644 index 0000000..dbd52f4 --- /dev/null +++ b/jape_rules/rules-matching/group_mapping.jape @@ -0,0 +1,42 @@ +Imports: { +import static gate.Utils.*; +} +Phase:secondphase +Input: Token +Options: control = appelt +Rule: group_mapping +( +{Token.root=~"[Gg]roup"} ({Token.kind=="NUMBER"} | ({Token.root=="[mdclxvi]"})[1,3] | {Token.word=="|"}) +(({Token.root == "and"} | {Token.root == "or"} | {Token.root == "to"} | {Token.word =~"[,-]"}) ({Token.kind=="NUMBER"} | ({Token.root=="[mdclxvi]"})[1,3] | {Token.word=="|"}))[0,4] +):group_mapping +--> +{ + +//{Token.root=~"[Gg]roup"} ({Token.kind=="NUMBER"} | {Token.pos=="NN"} | {Token.pos=="NNP"} | {Token.word=="|"}) +//(({Token.root == "and"} | {Token.root == "or"} | {Token.root == "to"} | {Token.word =~"[,-]"}) ({Token.kind=="NUMBER"} | {Token.pos=="NN"} | {Token.pos=="NNP"} | {Token.word=="|"}))[0,4] +//{ (([{ word:/group[s]*/; tag:/NN.*/}] ([{ner:NUMBER } | {word::IS_NUM} | {word:/^[mdclxvi]+$/}] [{ word:$DOSE_SEP }])* ([{ner:NUMBER } | {word::IS_NUM} | {word:/^[mdclxvi]+$/}]) ) ) => "GROUP" } +//ojo revisar el NN +//{ (( [{tag:/JJ.*/} ] [{tag:/JJ.*|NN.*/} ] [{ word:/group[s]*/; tag:/NN.*/}] ) ) => "GROUP" } +//{ (( [{tag:/JJ.*|NN.*/} ] [{ word:/group[s]*/; tag:/NN.*/}] ) ) => "GROUP" } + + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("group_mapping"); + try{ + gate.FeatureMap features = Factory.newFeatureMap(); + String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); + features.put("text",content); + features.put("rule", "group_mapping"); + + for (Annotation ann : lookup){ + if(ann.getFeatures().get("root").toString().toLowerCase().contains("group")){ + String group_id = stringFor(doc, ann.getEndNode().getOffset(),lookup.lastNode().getOffset()); + features.put("group_id", group_id.trim()); + } + } + + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "GROUP", features); + + }catch(InvalidOffsetException e){ + throw new LuckyException(e); + } + +} \ No newline at end of file diff --git a/jape_rules/rules-matching/statistical_significance_mapping.jape b/jape_rules/rules-matching/statistical_significance_mapping.jape new file mode 100644 index 0000000..a167fc5 --- /dev/null +++ b/jape_rules/rules-matching/statistical_significance_mapping.jape @@ -0,0 +1,37 @@ +Imports: { +import static gate.Utils.*; +} +Phase:secondphase +Input: Token +Options: control = appelt +Rule: statistical_significance_mapping +( +{Token.kind=="NUMBER22222"} +) :statistical_significance_mapping +--> +{ +//STATICAL SIGNIFICANCE +//{(/p/ $STATICAL_SYMBOL ([{ner:NUMBER }])) => "STATICAL_SIGNIFICANCE" } +//{( (/p/ $STATICAL_SYMBOL ([{ner:NUMBER }])) $STATICAL_SEP_KEY (/p/ $STATICAL_SYMBOL ([{ner:NUMBER }])) ) => "STATICAL_SIGNIFICANCE" } +//{ (( [/\**/] $PARENTESIS /p/ $STATICAL_SYMBOL ([{ner:NUMBER }]) ) $PARENTESIS ) => "STATICAL_SIGNIFICANCE" } +//{ (( [/\**/] $PARENTESIS /p/ $STATICAL_SYMBOL ([{ner:NUMBER }]) ) $PARENTESIS $STATICAL_SEP_KEY* ( [/\**/] $PARENTESIS /p/ $STATICAL_SYMBOL ([{ner:NUMBER }]) ) $PARENTESIS ) => "STATICAL_SIGNIFICANCE" } +//{(([{word::IS_NUM}]) $PERCENTAJE $PARENTESIS /p/ $STATICAL_SYMBOL ([{ner:NUMBER }]) $PARENTESIS ) => "STATICAL_SIGNIFICANCE" } +//{(([{word::IS_NUM}]) $PERCENTAJE $PARENTESIS /p/ $STATICAL_SYMBOL ([{ner:NUMBER }]) $PARENTESIS $STATICAL_SEP_KEY ([{word::IS_NUM}]) $PERCENTAJE $PARENTESIS /p/ $STATICAL_SYMBOL ([{ner:NUMBER }]) $PARENTESIS ) => "STATICAL_SIGNIFICANCE" } +//{( ([{ner:NUMBER }]) /\%/ /\(/ /p/ $STATICAL_SYMBOL ([{ner:NUMBER }]) /\)/ ) => "STATICAL_SIGNIFICANCE" } + + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("statistical_significance_mapping"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + //gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + //gate.Annotation sentence = sentence_set.iterator().next(); + try{ + gate.FeatureMap features = Factory.newFeatureMap(); + String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); + features.put("text",content); + features.put("rule", "statistical_significance_mapping"); + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "statistical_significance_mapping", features); + + }catch(InvalidOffsetException e){ + throw new LuckyException(e); + } + +} \ No newline at end of file -- GitLab From a39d0e00073d9e3fe3b8ad267a99c5b78ced53a4 Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Thu, 2 Dec 2021 23:16:21 +0100 Subject: [PATCH 2/6] relation extraction --- jape_rules/main.jape | 4 ++- .../create_treatment-related_findings.jape | 25 +++++++++++++++++++ .../finding_manifestation.jape | 24 ++++++++++++++++++ .../studytest_manifestation.jape | 24 ++++++++++++++++++ 4 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 jape_rules/relation-extraction/create_treatment-related_findings.jape create mode 100644 jape_rules/relation-extraction/finding_manifestation.jape create mode 100644 jape_rules/relation-extraction/studytest_manifestation.jape diff --git a/jape_rules/main.jape b/jape_rules/main.jape index c55cf7d..d16fb31 100644 --- a/jape_rules/main.jape +++ b/jape_rules/main.jape @@ -29,7 +29,6 @@ postprocessing-and-lexical-rules-matching/merge_annotations postprocessing-and-lexical-rules-matching/merge_hepatotoxicity postprocessing-and-lexical-rules-matching/merge_livermarker postprocessing-and-lexical-rules-matching/remove_study_domain - postprocessing-and-lexical-rules-matching/remove_manifestation_findings_false_positive_no_finding postprocessing-and-lexical-rules-matching/study_testcd_manifestation_finding postprocessing-and-lexical-rules-matching/study_testcd_over_finding @@ -47,4 +46,7 @@ relation-extraction/finding_group relation-extraction/finding_dose relation-extraction/finding_sex relation-extraction/finding_specimen +relation-extraction/finding_manifestation +relation-extraction/studytest_manifestation +relation-extraction/create_treatment-related_findings delete_lookups \ No newline at end of file diff --git a/jape_rules/relation-extraction/create_treatment-related_findings.jape b/jape_rules/relation-extraction/create_treatment-related_findings.jape new file mode 100644 index 0000000..02351c8 --- /dev/null +++ b/jape_rules/relation-extraction/create_treatment-related_findings.jape @@ -0,0 +1,25 @@ +//this rule generate the relations between the finding and the dose sex entities +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: FINDING STUDY_TESTCD +Options: control = appelt +Rule: create_treatment-related_findings +( +{FINDING} | {STUDY_TESTCD} +) :create_treatment-related_findings +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("create_treatment-related_findings"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + + AnnotationUtil.processFindingRelation(sentenceFields, sentence, ann, outputAS, "FINDINGS_2"); + +} \ No newline at end of file diff --git a/jape_rules/relation-extraction/finding_manifestation.jape b/jape_rules/relation-extraction/finding_manifestation.jape new file mode 100644 index 0000000..729534a --- /dev/null +++ b/jape_rules/relation-extraction/finding_manifestation.jape @@ -0,0 +1,24 @@ +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: FINDING +Options: control = appelt +Rule: finding_manifestation +( +{FINDING} +) :finding_manifestation +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("finding_manifestation"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + Annotation manifestation_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "MANIFESTATION_FINDING", 10, 5, ";" ); + if(manifestation_annotation!=null){ + ann.getFeatures().put("MANIFESTATION_FINDING", manifestation_annotation.getId()); + } +} \ No newline at end of file diff --git a/jape_rules/relation-extraction/studytest_manifestation.jape b/jape_rules/relation-extraction/studytest_manifestation.jape new file mode 100644 index 0000000..ab34a6f --- /dev/null +++ b/jape_rules/relation-extraction/studytest_manifestation.jape @@ -0,0 +1,24 @@ +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: STUDY_TESTCD +Options: control = appelt +Rule: studytest_manifestation +( +{STUDY_TESTCD} +) :studytest_manifestation +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("studytest_manifestation"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + Annotation manifestation_annotation = AnnotationUtil.getManifestationOfFindingAnnotation(sentenceFields, ann, ";" ); + if(manifestation_annotation!=null){ + ann.getFeatures().put("MANIFESTATION_FINDING", manifestation_annotation.getId()); + } +} \ No newline at end of file -- GitLab From 7b4294a0e3fc520802662e29819930d52a8a8519 Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Fri, 3 Dec 2021 09:56:32 +0100 Subject: [PATCH 3/6] FINDING as relation collection --- .../relation-extraction/create_treatment-related_findings.jape | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jape_rules/relation-extraction/create_treatment-related_findings.jape b/jape_rules/relation-extraction/create_treatment-related_findings.jape index 02351c8..6ba1d85 100644 --- a/jape_rules/relation-extraction/create_treatment-related_findings.jape +++ b/jape_rules/relation-extraction/create_treatment-related_findings.jape @@ -20,6 +20,6 @@ Rule: create_treatment-related_findings //fields of the sentence AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); - AnnotationUtil.processFindingRelation(sentenceFields, sentence, ann, outputAS, "FINDINGS_2"); + AnnotationUtil.processFindingRelation(sentenceFields, sentence, ann, outputAS, "FINDINGS"); } \ No newline at end of file -- GitLab From f7f9b147e7768d19b31250b9ae68c2df3ec2118e Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Fri, 3 Dec 2021 12:02:42 +0100 Subject: [PATCH 4/6] change gate generic component branch --- docker-build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-build.sh b/docker-build.sh index 0f70f3c..d021ef8 100755 --- a/docker-build.sh +++ b/docker-build.sh @@ -28,8 +28,8 @@ else fi git clone https://gitlab.bsc.es/inb/text-mining/generic-tools/nlp-gate-generic-component.git -cd nlp-gate-generic-component -git checkout gate_oncloud_version +#cd nlp-gate-generic-component +#git checkout gate_oncloud_version #move gate application mv gate_application ../ mvn clean install -DskipTests -- GitLab From 79648245286b564af32c8fd2e5e43446f5cfb98b Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Fri, 3 Dec 2021 12:02:42 +0100 Subject: [PATCH 5/6] change gate generic component branch --- docker-build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-build.sh b/docker-build.sh index 0f70f3c..f6570fe 100755 --- a/docker-build.sh +++ b/docker-build.sh @@ -29,7 +29,7 @@ fi git clone https://gitlab.bsc.es/inb/text-mining/generic-tools/nlp-gate-generic-component.git cd nlp-gate-generic-component -git checkout gate_oncloud_version +#git checkout gate_oncloud_version #move gate application mv gate_application ../ mvn clean install -DskipTests -- GitLab From 7cde0a2ae37053528621069fc8ee9f2441e2a763 Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Fri, 7 Jan 2022 12:59:35 +0100 Subject: [PATCH 6/6] sex detection closest --- jape_rules/main.jape | 2 +- .../finding_closest_sex.jape | 40 ++++++++++++ .../relation-extraction/finding_sex.jape | 62 +++++++++++++++---- jape_rules/relation-extraction/sex.jape | 30 --------- 4 files changed, 90 insertions(+), 44 deletions(-) create mode 100644 jape_rules/relation-extraction/finding_closest_sex.jape delete mode 100644 jape_rules/relation-extraction/sex.jape diff --git a/jape_rules/main.jape b/jape_rules/main.jape index d16fb31..7acc1cd 100644 --- a/jape_rules/main.jape +++ b/jape_rules/main.jape @@ -44,7 +44,7 @@ relation-extraction/finding_dose_sex relation-extraction/finding_is_treatment_related relation-extraction/finding_group relation-extraction/finding_dose -relation-extraction/finding_sex +relation-extraction/finding_closest_sex relation-extraction/finding_specimen relation-extraction/finding_manifestation relation-extraction/studytest_manifestation diff --git a/jape_rules/relation-extraction/finding_closest_sex.jape b/jape_rules/relation-extraction/finding_closest_sex.jape new file mode 100644 index 0000000..748291e --- /dev/null +++ b/jape_rules/relation-extraction/finding_closest_sex.jape @@ -0,0 +1,40 @@ +//this rule generate the relations between the finding and the dose sex entities +Imports: { +import static gate.Utils.*; +import es.bsc.inb.nlp.gate.generic.component.util.*; +} +Phase:secondphase +Input: FINDING STUDY_TESTCD +Options: control = appelt +Rule: finding_sex +( +{FINDING} | {STUDY_TESTCD} +) :finding_sex +--> +{ + gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("finding_sex"); + gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); + gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); + gate.Annotation sentence = sentence_set.iterator().next(); + + //only if there is no dose_sex and dose_sex_multi + + //fields of the sentence + AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); + //try{ + //Annotation dose_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "SEX", AnnotationUtil.right_limit, AnnotationUtil.left_limit, null); + Annotation sex_annotation = AnnotationUtil.getClosestAnnotationWithPOSPriorityRight(sentenceFields, ann, "SEX", AnnotationUtil.right_limit, null,"IN"); + if(sex_annotation!=null){ + ann.getFeatures().put("SEX", sex_annotation.getId()); + //outputAS.add(ann.getStartNode(),sex_annotation.getEndNode(), "FINDING_SEX_IN", Factory.newFeatureMap()); + }else{ + sex_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "SEX", AnnotationUtil.right_limit, AnnotationUtil.left_limit, null); + if(sex_annotation!=null){ + ann.getFeatures().put("SEX", sex_annotation.getId()); + //outputAS.add(ann.getStartNode(),sex_annotation.getEndNode(), "FINDING_SEX_CLOSEST", Factory.newFeatureMap()); + } + } + //}catch(InvalidOffsetException e){ + // throw new LuckyException(e); + //} +} \ No newline at end of file diff --git a/jape_rules/relation-extraction/finding_sex.jape b/jape_rules/relation-extraction/finding_sex.jape index e486e9c..1d0343e 100644 --- a/jape_rules/relation-extraction/finding_sex.jape +++ b/jape_rules/relation-extraction/finding_sex.jape @@ -1,29 +1,65 @@ -//this rule generate the relations between the finding and the dose sex entities +//this rule generate the relations between the finding and the sex Imports: { import static gate.Utils.*; -import es.bsc.inb.nlp.gate.generic.component.util.*; } Phase:secondphase -Input: FINDING STUDY_TESTCD -Options: control = appelt +Input: Token Sentence SEX FINDING STUDY_TESTCD +Options: control = all Rule: finding_sex ( -{FINDING} | {STUDY_TESTCD} +({FINDING} | {STUDY_TESTCD}) (({Token})[0,5] {Token.pos=="IN"} ({Token})[0,3] ({SEX})) ) :finding_sex --> { + //actualment eno en uso logica realizada en java desde finding_closest_sex + //no esta sirviendo ver el ejemplo 100. Ademas de no estar tomando bien cuando anotar ya que la logica de ver si tiene DOSE_SEX_MULTI + // y DOSE_SEX no esta funcionando me esta tomando siempre el mayor con in no el mas cercano, por lo tanto ejemplo como el: + //B lymphocites + //y termina siendo contraproducente esta regla, funciona mejor con el closest. + //Ademas tampoco esta teniendo en cuenta la sentencia. + //Osea hay que escribir una que tenga preponderancia con el IN, pero en la misma sentence y que se quede con el mas pequeño. gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("finding_sex"); gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); gate.Annotation sentence = sentence_set.iterator().next(); + gate.AnnotationSet finding_entity = lookup.get("FINDING"); + gate.AnnotationSet study_entity = lookup.get("STUDY_TESTCD"); + gate.AnnotationSet sex_entity = lookup.get("SEX"); - //only if there is no dose_sex and dose_sex_multi - - //fields of the sentence - AnnotationSet sentenceFields = outputAS.get(sentence.getStartNode().getOffset(), sentence.getEndNode().getOffset()); - - Annotation dose_annotation = AnnotationUtil.getClosestAnnotation(sentenceFields, ann, "SEX", AnnotationUtil.right_limit, AnnotationUtil.left_limit, null); - if(dose_annotation!=null){ - ann.getFeatures().put("SEX", dose_annotation.getId()); + try{ + if(finding_entity!=null && finding_entity.size()>0){ + Annotation finding = finding_entity.iterator().next(); + if((finding.getFeatures().get("DOSE_SEX_MULTI")!=null && !finding.getFeatures().get("DOSE_SEX_MULTI").toString().equals("")) || + (finding.getFeatures().get("DOSE_SEX")!=null && !finding.getFeatures().get("DOSE_SEX").toString().equals("")) + ){ + System.out.println("tiene dose sex"); + } else { + if(sex_entity!=null && sex_entity.size()>0){ + finding.getFeatures().put("FINDING_SEX", sex_entity.iterator().next().getId()); + System.out.println("add finding sex"); + } + } + + }else if(study_entity!=null && study_entity.size()>0){ + Annotation study = study_entity.iterator().next(); + if((study.getFeatures().get("DOSE_SEX_MULTI")!=null && !study.getFeatures().get("DOSE_SEX_MULTI").toString().equals("")) || + (study.getFeatures().get("DOSE_SEX")!=null && !study.getFeatures().get("DOSE_SEX").toString().equals("")) + + ){ + System.out.println("tiene dose sex"); + } else { + if(sex_entity!=null && sex_entity.size()>0){ + study.getFeatures().put("FINDING_SEX", sex_entity.iterator().next().getId()); + System.out.println("add finding sex"); + } + } + } + gate.FeatureMap features = Factory.newFeatureMap(); + String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); + features.put("text",content); + features.put("rule", "finding_sex"); + outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "FINDING_SEX", features); + }catch(InvalidOffsetException e){ + throw new LuckyException(e); } } \ No newline at end of file diff --git a/jape_rules/relation-extraction/sex.jape b/jape_rules/relation-extraction/sex.jape deleted file mode 100644 index 493aaa3..0000000 --- a/jape_rules/relation-extraction/sex.jape +++ /dev/null @@ -1,30 +0,0 @@ -//this rule generate the relations between the finding and the sex -Imports: { -import static gate.Utils.*; -} -Phase:secondphase -Input: Token Sentence SEX FINDING STUDY_TESTCD -Options: control = appelt -Rule: sex_rule -( -({STUDY_TESTCD} | {FINDING}) (({Token})[0,20] {Token.pos=="IN"} ({Token})[0,3] {SEX}) -) :sex_rule ---> -{ - - gate.AnnotationSet lookup = (gate.AnnotationSet) bindings.get("sex_rule"); - gate.Annotation ann = (gate.Annotation) lookup.iterator().next(); - gate.AnnotationSet sentence_set = outputAS.get("Sentence",ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); - gate.Annotation sentence = sentence_set.iterator().next(); - System.out.println("sex_rule"); - System.out.println(stringFor(doc, sentence)); - try{ - gate.FeatureMap features = Factory.newFeatureMap(); - String content = stringFor(doc, lookup.firstNode().getOffset(),lookup.lastNode().getOffset()); - features.put("text",content); - features.put("rule", "change_specimen"); - outputAS.add(lookup.firstNode().getOffset(),lookup.lastNode().getOffset(), "FINDING_SEX", features); - }catch(InvalidOffsetException e){ - throw new LuckyException(e); - } -} \ No newline at end of file -- GitLab