From 19291df86028c387ecc94386556cbcbb295f5019 Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Wed, 30 Mar 2022 11:03:35 +0200 Subject: [PATCH] new version of relation extraction for finding - specimen --- .../component/util/AnnotationUtil.java | 175 +++++++++++------- 1 file changed, 106 insertions(+), 69 deletions(-) diff --git a/src/main/java/es/bsc/inb/nlp/gate/generic/component/util/AnnotationUtil.java b/src/main/java/es/bsc/inb/nlp/gate/generic/component/util/AnnotationUtil.java index 53d39ca..bb3b256 100644 --- a/src/main/java/es/bsc/inb/nlp/gate/generic/component/util/AnnotationUtil.java +++ b/src/main/java/es/bsc/inb/nlp/gate/generic/component/util/AnnotationUtil.java @@ -203,7 +203,7 @@ public class AnnotationUtil { token_between_closest = token_between.size(); } }else { - System.out.println("testear"); + //System.out.println("testear"); } } return closest; @@ -242,6 +242,7 @@ public class AnnotationUtil { if(!doc.getName().equals(aux_docName)) { finding_id = 1; aux_docName = doc.getName(); + System.out.println("processFindingRelation :: document : " + aux_docName); } finding.getFeatures().put("ANNOTATION_TYPE",finding.getType()); @@ -296,16 +297,47 @@ public class AnnotationUtil { group.getFeatures().put("ANNOTATION_TYPE",group.getType()); doc.getAnnotations(annotationSetRelationExtraction).add(group.getStartNode(), group.getEndNode(), "FINDING_"+finding_id, group.getFeatures()); } - + + List findings_id_copy = new ArrayList<>(); + findings_id_copy.add(finding_id); + if (finding.getFeatures().get("SPECIMEN")!=null && !finding.getFeatures().get("SPECIMEN").toString().equals("")) { - Annotation specimen = as.get(new Integer(finding.getFeatures().get("SPECIMEN").toString())); - specimen.getFeatures().put(template_value_name, getSendCode(specimen, gate.Utils.stringFor(doc, specimen))); - specimen.getFeatures().put(send_code_name, getOnlySENDCode(specimen, gate.Utils.stringFor(doc, specimen))); - specimen.getFeatures().put(send_codelist_name, getOnlySENDCodeList(specimen, gate.Utils.stringFor(doc, specimen))); - specimen.getFeatures().put("ANNOTATION_TYPE",specimen.getType()); - doc.getAnnotations(annotationSetRelationExtraction).add(specimen.getStartNode(), specimen.getEndNode(), "FINDING_"+finding_id, specimen.getFeatures()); + //Specimen relation process + String specimens = finding.getFeatures().get("SPECIMEN").toString(); + String[] specimens_splited = specimens.split(","); + Boolean first = true; + for (String spe_id : specimens_splited) { + if(!first) { + //if is not the first. Only enter when there is more than one specimen. + //Need to make copy of FINDING and increment finding id + //get all atributes from actual finding and replicate annotations + Integer finding_ant_id = finding_id; + finding_id = finding_id + 1; + AnnotationSet finding_atributes = doc.getAnnotations(annotationSetRelationExtraction).get("FINDING_"+finding_ant_id); + for (Annotation annotation : finding_atributes) { + if(!annotation.getFeatures().get("ANNOTATION_TYPE").toString().equals("SPECIMEN")) { + doc.getAnnotations(annotationSetRelationExtraction).add(annotation.getStartNode(), annotation.getEndNode(), "FINDING_"+finding_id, annotation.getFeatures()); + } + } + findings_id_copy.add(finding_id); + }else { + first = false; + } + //add specimen to finding + Annotation specimen = as.get(new Integer(spe_id)); + specimen.getFeatures().put(template_value_name, getSendCode(specimen, gate.Utils.stringFor(doc, specimen))); + specimen.getFeatures().put(send_code_name, getOnlySENDCode(specimen, gate.Utils.stringFor(doc, specimen))); + specimen.getFeatures().put(send_codelist_name, getOnlySENDCodeList(specimen, gate.Utils.stringFor(doc, specimen))); + specimen.getFeatures().put("ANNOTATION_TYPE",specimen.getType()); + doc.getAnnotations(annotationSetRelationExtraction).add(specimen.getStartNode(), specimen.getEndNode(), "FINDING_"+finding_id, specimen.getFeatures()); + } } + + + + + // Annotation STUDY_DOMAIN = getClosestAnnotation(doc, sentenceFields, finding, "STUDY_DOMAIN", right_limit, left_limit, null); // if(STUDY_DOMAIN!=null) { @@ -341,82 +373,87 @@ public class AnnotationUtil { // } //if there is already a finding related to a dose_sex then - if(finding.getFeatures().get("DOSE_SEX")!=null || finding.getFeatures().get("DOSE_SEX_MULTI")!=null ) { - if(finding.getFeatures().get("DOSE_SEX_MULTI")!=null && !finding.getFeatures().get("DOSE_SEX_MULTI").toString().equals("")) { - //multi analysis - Annotation dose_sex_multi = as.get(new Integer(finding.getFeatures().get("DOSE_SEX_MULTI").toString())); - AnnotationSet dose_sex_set = as.get("DOSE_SEX", dose_sex_multi.getStartNode().getOffset(), dose_sex_multi.getEndNode().getOffset()); - //por cada dosis y sexo relaciono - Boolean first =true; - for (Annotation dose_sex : dose_sex_set) { - Annotation dose = as.get("DOSE", dose_sex.getStartNode().getOffset(), dose_sex.getEndNode().getOffset()).iterator().next(); - Annotation sex = as.get("SEX", dose_sex.getStartNode().getOffset(), dose_sex.getEndNode().getOffset()).iterator().next(); - if(first) { - first = false; - }else { - //Need to make copy of FINDING and increment finding id - int finding_ant = finding_id; - finding_id = finding_id + 1; - //get all atributes from actual finding and replicate annotations - AnnotationSet finding_atributes = doc.getAnnotations(annotationSetRelationExtraction).get("FINDING_"+finding_ant); - for (Annotation annotation : finding_atributes) { - if(!annotation.getFeatures().get("ANNOTATION_TYPE").toString().equals("SEX") && !annotation.getFeatures().get("ANNOTATION_TYPE").toString().equals("DOSE")) { - doc.getAnnotations(annotationSetRelationExtraction).add(annotation.getStartNode(), annotation.getEndNode(), "FINDING_"+finding_id, annotation.getFeatures()); + for (Integer finding_id_to_process : findings_id_copy) { + //iterate over each finding in collection + if(finding.getFeatures().get("DOSE_SEX")!=null || finding.getFeatures().get("DOSE_SEX_MULTI")!=null ) { + if(finding.getFeatures().get("DOSE_SEX_MULTI")!=null && !finding.getFeatures().get("DOSE_SEX_MULTI").toString().equals("")) { + //multi analysis + Annotation dose_sex_multi = as.get(new Integer(finding.getFeatures().get("DOSE_SEX_MULTI").toString())); + AnnotationSet dose_sex_set = as.get("DOSE_SEX", dose_sex_multi.getStartNode().getOffset(), dose_sex_multi.getEndNode().getOffset()); + //por cada dosis y sexo relaciono + Boolean first =true; + for (Annotation dose_sex : dose_sex_set) { + Annotation dose = as.get("DOSE", dose_sex.getStartNode().getOffset(), dose_sex.getEndNode().getOffset()).iterator().next(); + Annotation sex = as.get("SEX", dose_sex.getStartNode().getOffset(), dose_sex.getEndNode().getOffset()).iterator().next(); + if(!first) { + //Need to make copy of FINDING and increment finding id + int finding_ant = finding_id_to_process; + finding_id = finding_id + 1; + //get all atributes from actual finding and replicate annotations + AnnotationSet finding_atributes = doc.getAnnotations(annotationSetRelationExtraction).get("FINDING_"+finding_ant); + for (Annotation annotation : finding_atributes) { + if(!annotation.getFeatures().get("ANNOTATION_TYPE").toString().equals("SEX") && !annotation.getFeatures().get("ANNOTATION_TYPE").toString().equals("DOSE")) { + doc.getAnnotations(annotationSetRelationExtraction).add(annotation.getStartNode(), annotation.getEndNode(), "FINDING_"+finding_id, annotation.getFeatures()); + } } + finding_id_to_process = finding_id; + }else { + first = false; } + dose.getFeatures().put("ANNOTATION_TYPE",dose.getType()); + dose.getFeatures().put(template_value_name, gate.Utils.stringFor(doc, dose)); + dose.getFeatures().put(send_code_name, gate.Utils.stringFor(doc, dose)); + doc.getAnnotations(annotationSetRelationExtraction).add(dose.getStartNode(), dose.getEndNode(), "FINDING_"+finding_id_to_process, dose.getFeatures()); + String send_code = getSendCode(sex, gate.Utils.stringFor(doc, sex)); + sex.getFeatures().put(template_value_name, send_code); + sex.getFeatures().put(send_code_name, getOnlySENDCode(sex, gate.Utils.stringFor(doc, sex))); + sex.getFeatures().put("ANNOTATION_TYPE",sex.getType()); + doc.getAnnotations(annotationSetRelationExtraction).add(sex.getStartNode(), sex.getEndNode(), "FINDING_"+finding_id_to_process, sex.getFeatures()); } + }else if (finding.getFeatures().get("DOSE_SEX")!=null && !finding.getFeatures().get("DOSE_SEX").toString().equals("")) { + // dose sex analysis + Annotation dose_sex = as.get(new Integer(finding.getFeatures().get("DOSE_SEX").toString())); + Annotation dose = as.get("DOSE", dose_sex.getStartNode().getOffset(), dose_sex.getEndNode().getOffset()).iterator().next(); + Annotation sex = as.get("SEX", dose_sex.getStartNode().getOffset(), dose_sex.getEndNode().getOffset()).iterator().next(); + dose.getFeatures().put("ANNOTATION_TYPE",dose.getType()); dose.getFeatures().put(template_value_name, gate.Utils.stringFor(doc, dose)); dose.getFeatures().put(send_code_name, gate.Utils.stringFor(doc, dose)); - doc.getAnnotations(annotationSetRelationExtraction).add(dose.getStartNode(), dose.getEndNode(), "FINDING_"+finding_id, dose.getFeatures()); + doc.getAnnotations(annotationSetRelationExtraction).add(dose.getStartNode(), dose.getEndNode(), "FINDING_"+finding_id_to_process, dose.getFeatures()); + String send_code = getSendCode(sex, gate.Utils.stringFor(doc, sex)); sex.getFeatures().put(template_value_name, send_code); sex.getFeatures().put(send_code_name, getOnlySENDCode(sex, gate.Utils.stringFor(doc, sex))); sex.getFeatures().put("ANNOTATION_TYPE",sex.getType()); - doc.getAnnotations(annotationSetRelationExtraction).add(sex.getStartNode(), sex.getEndNode(), "FINDING_"+finding_id, sex.getFeatures()); + doc.getAnnotations(annotationSetRelationExtraction).add(sex.getStartNode(), sex.getEndNode(), "FINDING_"+finding_id_to_process, sex.getFeatures()); } - }else if (finding.getFeatures().get("DOSE_SEX")!=null && !finding.getFeatures().get("DOSE_SEX").toString().equals("")) { - // dose sex analysis - Annotation dose_sex = as.get(new Integer(finding.getFeatures().get("DOSE_SEX").toString())); - Annotation dose = as.get("DOSE", dose_sex.getStartNode().getOffset(), dose_sex.getEndNode().getOffset()).iterator().next(); - Annotation sex = as.get("SEX", dose_sex.getStartNode().getOffset(), dose_sex.getEndNode().getOffset()).iterator().next(); - dose.getFeatures().put("ANNOTATION_TYPE",dose.getType()); - dose.getFeatures().put(template_value_name, gate.Utils.stringFor(doc, dose)); - dose.getFeatures().put(send_code_name, gate.Utils.stringFor(doc, dose)); - doc.getAnnotations(annotationSetRelationExtraction).add(dose.getStartNode(), dose.getEndNode(), "FINDING_"+finding_id, dose.getFeatures()); - String send_code = getSendCode(sex, gate.Utils.stringFor(doc, sex)); - sex.getFeatures().put(template_value_name, send_code); - sex.getFeatures().put(send_code_name, getOnlySENDCode(sex, gate.Utils.stringFor(doc, sex))); - sex.getFeatures().put("ANNOTATION_TYPE",sex.getType()); - doc.getAnnotations(annotationSetRelationExtraction).add(sex.getStartNode(), sex.getEndNode(), "FINDING_"+finding_id, sex.getFeatures()); - } - - - }else { - if (finding.getFeatures().get("DOSE")!=null && !finding.getFeatures().get("DOSE").toString().equals("")) { - Annotation dose = as.get(new Integer(finding.getFeatures().get("DOSE").toString())); - dose.getFeatures().put("ANNOTATION_TYPE",dose.getType()); - dose.getFeatures().put(template_value_name, gate.Utils.stringFor(doc, dose)); - dose.getFeatures().put(send_code_name, gate.Utils.stringFor(doc, dose)); - doc.getAnnotations(annotationSetRelationExtraction).add(dose.getStartNode(), dose.getEndNode(), "FINDING_"+finding_id, dose.getFeatures()); - }else if (finding.getFeatures().get("DOSE_QUALIFICATION")!=null && !finding.getFeatures().get("DOSE_QUALIFICATION").toString().equals("")) { - Annotation dose_qualification = as.get(new Integer(finding.getFeatures().get("DOSE_QUALIFICATION").toString())); - dose_qualification.getFeatures().put("ANNOTATION_TYPE","DOSE"); - dose_qualification.getFeatures().put(template_value_name, gate.Utils.stringFor(doc, dose_qualification)); - dose_qualification.getFeatures().put(send_code_name, gate.Utils.stringFor(doc, dose_qualification)); - doc.getAnnotations(annotationSetRelationExtraction).add(dose_qualification.getStartNode(), dose_qualification.getEndNode(), "FINDING_"+finding_id, dose_qualification.getFeatures()); - } - if (finding.getFeatures().get("SEX")!=null && !finding.getFeatures().get("SEX").toString().equals("")) { - Annotation sex = as.get(new Integer(finding.getFeatures().get("SEX").toString())); - String send_code = getSendCode(sex, gate.Utils.stringFor(doc, sex)); - sex.getFeatures().put(template_value_name, send_code); - sex.getFeatures().put(send_code_name, getOnlySENDCode(sex, gate.Utils.stringFor(doc, sex))); - sex.getFeatures().put("ANNOTATION_TYPE",sex.getType()); - doc.getAnnotations(annotationSetRelationExtraction).add(sex.getStartNode(), sex.getEndNode(), "FINDING_"+finding_id, sex.getFeatures()); + }else { + if (finding.getFeatures().get("DOSE")!=null && !finding.getFeatures().get("DOSE").toString().equals("")) { + Annotation dose = as.get(new Integer(finding.getFeatures().get("DOSE").toString())); + dose.getFeatures().put("ANNOTATION_TYPE",dose.getType()); + dose.getFeatures().put(template_value_name, gate.Utils.stringFor(doc, dose)); + dose.getFeatures().put(send_code_name, gate.Utils.stringFor(doc, dose)); + doc.getAnnotations(annotationSetRelationExtraction).add(dose.getStartNode(), dose.getEndNode(), "FINDING_"+finding_id_to_process, dose.getFeatures()); + }else if (finding.getFeatures().get("DOSE_QUALIFICATION")!=null && !finding.getFeatures().get("DOSE_QUALIFICATION").toString().equals("")) { + Annotation dose_qualification = as.get(new Integer(finding.getFeatures().get("DOSE_QUALIFICATION").toString())); + dose_qualification.getFeatures().put("ANNOTATION_TYPE","DOSE"); + dose_qualification.getFeatures().put(template_value_name, gate.Utils.stringFor(doc, dose_qualification)); + dose_qualification.getFeatures().put(send_code_name, gate.Utils.stringFor(doc, dose_qualification)); + doc.getAnnotations(annotationSetRelationExtraction).add(dose_qualification.getStartNode(), dose_qualification.getEndNode(), "FINDING_"+finding_id_to_process, dose_qualification.getFeatures()); + } + if (finding.getFeatures().get("SEX")!=null && !finding.getFeatures().get("SEX").toString().equals("")) { + Annotation sex = as.get(new Integer(finding.getFeatures().get("SEX").toString())); + String send_code = getSendCode(sex, gate.Utils.stringFor(doc, sex)); + sex.getFeatures().put(template_value_name, send_code); + sex.getFeatures().put(send_code_name, getOnlySENDCode(sex, gate.Utils.stringFor(doc, sex))); + sex.getFeatures().put("ANNOTATION_TYPE",sex.getType()); + doc.getAnnotations(annotationSetRelationExtraction).add(sex.getStartNode(), sex.getEndNode(), "FINDING_"+finding_id_to_process, sex.getFeatures()); + } } } + finding_id = finding_id + 1; -- GitLab