Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
INB
eTRANSAFE
pretox-sr-domain-identification
Commits
33a68113
Commit
33a68113
authored
Nov 02, 2021
by
Javi Corvi
Browse files
new results of pipeline
parent
ec995dc1
Pipeline
#25589
passed with stage
in 1 minute and 43 seconds
Changes
1
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/main/java/es/bsc/inb/ades/export/json/main/App.java
View file @
33a68113
...
...
@@ -39,6 +39,7 @@ import com.fasterxml.jackson.core.JsonGenerationException;
import
com.google.gson.Gson
;
import
com.google.gson.GsonBuilder
;
import
com.google.gson.JsonArray
;
import
com.google.gson.JsonElement
;
import
com.google.gson.JsonObject
;
import
gate.Annotation
;
...
...
@@ -172,8 +173,9 @@ public class App {
String
fileOutPutName
=
file
.
getName
();
File
outputAbstractFile
=
new
File
(
outputDirectoryPath
+
File
.
separator
+
fileOutPutName
.
replace
(
".xml"
,
"_documents.json"
));
File
outputAnnotationsFile
=
new
File
(
outputDirectoryPath
+
File
.
separator
+
fileOutPutName
.
replace
(
".xml"
,
"_annotations.json"
));
File
cvsOutputFile
=
new
File
(
outputDirectoryPath
+
File
.
separator
+
fileOutPutName
.
replace
(
".xml"
,
"_FINDINGS.csv"
));
processDocument
(
file
,
outputAbstractFile
,
outputAnnotationsFile
,
cvsOutputFile
,
annotationSet
,
annotationSetRelationExtraction
);
File
cvsFindingsOutputFile
=
new
File
(
outputDirectoryPath
+
File
.
separator
+
fileOutPutName
.
replace
(
".xml"
,
"_FINDINGS.csv"
));
File
cvsSRDomainOutputFile
=
new
File
(
outputDirectoryPath
+
File
.
separator
+
fileOutPutName
.
replace
(
".xml"
,
"_SRDOMAIN.csv"
));
processDocument
(
file
,
outputAbstractFile
,
outputAnnotationsFile
,
cvsFindingsOutputFile
,
cvsSRDomainOutputFile
,
annotationSet
,
annotationSetRelationExtraction
);
}
catch
(
ResourceInstantiationException
e
)
{
System
.
out
.
println
(
"App::process :: error with document "
+
file
.
getAbsolutePath
());
e
.
printStackTrace
();
...
...
@@ -204,7 +206,7 @@ public class App {
* @throws JsonGenerationException
* @throws InvalidOffsetException
*/
private
static
void
processDocument
(
File
inputFile
,
File
outputTextFile
,
File
outputAnnotationsFile
,
File
cvsOutputFile
,
String
annotationSet
,
String
annotationSetRelationExtraction
)
throws
ResourceInstantiationException
,
JsonGenerationException
,
IOException
,
InvalidOffsetException
{
private
static
void
processDocument
(
File
inputFile
,
File
outputTextFile
,
File
outputAnnotationsFile
,
File
cvs
Findings
OutputFile
,
File
cvsSRDomainOutputFile
,
String
annotationSet
,
String
annotationSetRelationExtraction
)
throws
ResourceInstantiationException
,
JsonGenerationException
,
IOException
,
InvalidOffsetException
{
gate
.
Document
doc
=
Factory
.
newDocument
(
inputFile
.
toURI
().
toURL
(),
"UTF-8"
);
Gson
gsonBuilder
=
new
GsonBuilder
().
create
();
JsonObject
annotated_document
=
new
JsonObject
();
...
...
@@ -232,11 +234,11 @@ public class App {
writer1
.
flush
();
writer1
.
close
();
writer1
=
null
;
annotated_document
=
null
;
exportCVS
(
annotated_document
,
cvsOutputFile
);
exportDataCVS
(
annotated_document
,
cvsFindingsOutputFile
,
cvsSRDomainOutputFile
);
annotated_document
=
null
;
//document text
DateFormat
dateFormat
=
new
SimpleDateFormat
(
"yyyy/MM/dd HH:mm:ss"
);
Date
date
=
new
Date
();
...
...
@@ -256,10 +258,136 @@ public class App {
}
private
static
void
exportCVS
(
JsonObject
annotated_document
,
File
cvsOutput
)
{
private
static
void
export
Data
CVS
(
JsonObject
annotated_document
,
File
cvs
FindingsOutput
,
File
cvsSRDomain
Output
)
{
StringBuffer
retStr
=
new
StringBuffer
(
""
);
retStr
.
append
(
"SENTENCE|FINDING|STUDY_TESTCD|MANIFESTATION_OF_FINDING|SPECIMEN|SEX|DOSE|GROUP|IS_TREATMENT_RELATED|STATUS"
);
retStr
.
append
(
System
.
getProperty
(
"line.separator"
));
StringBuffer
retStr2
=
new
StringBuffer
(
""
);
retStr2
.
append
(
"STUDYID|DOMAIN|SRSEQ|SRRISK|SPGRPCD|GRPLBL|SRGRPDOS|SRSEX|SRSTDY|SRSTPHSE|SROBSTDY|SRENDY|SRENPHSE|SROBENDY|SRDOMAIN|SRSPEC|SRTSTCD|SRFNDG|SRORES|SROBSV|SROBSQ|SRSEV|SRPCNT|SRSIGF|SRTRTEF|SRCOMNT|STATUS"
);
retStr2
.
append
(
System
.
getProperty
(
"line.separator"
));
try
{
createTxtFile
(
"SR-DOMAIN-RESULT.csv"
,
retStr
.
toString
());
JsonArray
sentences
=
annotated_document
.
getAsJsonArray
(
"relevant_sentences"
);
for
(
JsonElement
sentence
:
sentences
)
{
JsonObject
sentenceObj
=
sentence
.
getAsJsonObject
();
String
text_
=
sentenceObj
.
get
(
"text"
).
getAsString
();
String
inlinedText
=
text_
.
replaceAll
(
"\\R"
,
" "
);
String
text
=
inlinedText
.
replaceAll
(
"\\|"
,
","
);
JsonArray
findings
=
sentenceObj
.
getAsJsonArray
(
"findings"
);
for
(
JsonElement
finding
:
findings
)
{
JsonObject
findingObj
=
finding
.
getAsJsonObject
();
String
finding_str
=
findingObj
.
get
(
"FINDING"
)!=
null
?
findingObj
.
get
(
"FINDING"
).
getAsJsonObject
().
get
(
"text"
).
getAsString
():
""
;
String
specimen_str
=
findingObj
.
get
(
"SPECIMEN"
)!=
null
?
findingObj
.
get
(
"SPECIMEN"
).
getAsJsonObject
().
get
(
"text"
).
getAsString
():
""
;
String
study_testcd_str
=
findingObj
.
get
(
"STUDY_TESTCD"
)!=
null
?
findingObj
.
get
(
"STUDY_TESTCD"
).
getAsJsonObject
().
get
(
"text"
).
getAsString
():
""
;
String
sex_str
=
findingObj
.
get
(
"SEX"
)!=
null
?
findingObj
.
get
(
"SEX"
).
getAsJsonObject
().
get
(
"text"
).
getAsString
():
""
;
String
manifestation_str
=
findingObj
.
get
(
"MANIFESTATION_FINDING"
)!=
null
?
findingObj
.
get
(
"MANIFESTATION_FINDING"
).
getAsJsonObject
().
get
(
"text"
).
getAsString
():
""
;
String
is_treatment_related_str
=
findingObj
.
get
(
"IS_TREATMENT_RELATED"
)!=
null
?
findingObj
.
get
(
"IS_TREATMENT_RELATED"
).
getAsJsonObject
().
get
(
"value"
).
getAsString
():
""
;
String
dose_str
=
findingObj
.
get
(
"DOSE"
)!=
null
?
findingObj
.
get
(
"DOSE"
).
getAsJsonObject
().
get
(
"text"
).
getAsString
():
""
;
String
group_str
=
findingObj
.
get
(
"GROUP"
)!=
null
?
findingObj
.
get
(
"GROUP"
).
getAsJsonObject
().
get
(
"text"
).
getAsString
():
""
;
retStr
.
append
(
text
).
append
(
"|"
);
retStr
.
append
(
finding_str
).
append
(
"|"
);
retStr
.
append
(
study_testcd_str
).
append
(
"|"
);
retStr
.
append
(
manifestation_str
).
append
(
"|"
);
retStr
.
append
(
specimen_str
).
append
(
"|"
);
retStr
.
append
(
sex_str
).
append
(
"|"
);
retStr
.
append
(
dose_str
).
append
(
"|"
);
retStr
.
append
(
group_str
).
append
(
"|"
);
retStr
.
append
(
is_treatment_related_str
).
append
(
"|"
);
retStr
.
append
(
"not_curated"
).
append
(
"\n"
);
JsonArray
srDomainFindings
=
findingObj
.
getAsJsonArray
(
"srDomainFindings"
);
for
(
JsonElement
srDomainFinding
:
srDomainFindings
)
{
JsonObject
srDomainFindingObj
=
srDomainFinding
.
getAsJsonObject
();
//STUDYID -- > Study Identifier
retStr2
.
append
(
srDomainFindingObj
.
get
(
"srDomainId"
).
getAsString
());
retStr2
.
append
(
"|"
);
//DOMAIN -- > Domain Abbreviation, always SR
retStr2
.
append
(
"SR"
);
retStr2
.
append
(
"|"
);
//SRSEQ -- > Sequence number
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRSEQ"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRRISK --> Effect Level Associated with a Group/Sex, NOEL, LOEL, NOAEL, LOAEL, HNSTD, STD, MTD
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRRISK"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SPGRPCD --> Sponsor-defined Group Code, group 1 , A
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SPGRPCD"
).
getAsString
());
retStr2
.
append
(
"|"
);
//GRPLBL --> Sponsor-defined Group Name, "low dose", "mid dose"
retStr2
.
append
(
srDomainFindingObj
.
get
(
"GRPLBL"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRGRPDOS --> Group Dose Level, "20 mg/kg"
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRGRPDOS"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRSEX -- > Sex
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRSEX"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRSTDY --> Study Day of Start of Finding
retStr2
.
append
(
""
);
retStr2
.
append
(
"|"
);
//SRSTPHSE --> Study Phase of first Observation, “PRE-DOSING”, “DOSING”, “RECOVERY”
retStr2
.
append
(
""
);
retStr2
.
append
(
"|"
);
//SROBSTDY --> Start Phase Day of Observation
retStr2
.
append
(
""
);
retStr2
.
append
(
"|"
);
//SRENDY --> ???
retStr2
.
append
(
""
);
retStr2
.
append
(
"|"
);
//SRENPHSE -->Study Phase of last Observation
retStr2
.
append
(
""
);
retStr2
.
append
(
"|"
);
//SROBENDY --> ???
retStr2
.
append
(
""
);
retStr2
.
append
(
"|"
);
//SRDOMAIN --> Domain of Finding
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRDOMAIN"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRSPEC --> Specimen of Finding
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRSPEC"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRTSTCD --> Test Short Name
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRTSTCD"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRFNDG --> Finding
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRFNDG"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRORES --> Observation (original result)
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRORES"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SROBSV --> Manifestation of Finding, “I” (Increase), “D” (Decrease), “P” (Present) or “A” (Absent)
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SROBSV"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SROBSQ --> Observation Qualifier, “R” (Reversible), “T” (Transient) or ‘none’
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SROBSQ"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRSEV --> Severity of Finding
retStr2
.
append
(
""
);
retStr2
.
append
(
"|"
);
//SRPCNT --> Scale of this Finding
retStr2
.
append
(
""
);
retStr2
.
append
(
"|"
);
//SRSIGF --> Statistical Significance
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRSIGF"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRTRTEF --> Treatment-Related
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRTRTEF"
).
getAsString
());
retStr2
.
append
(
"|"
);
//SRCOMNT --> Comment
retStr2
.
append
(
srDomainFindingObj
.
get
(
"SRCOMNT"
).
getAsString
());
retStr2
.
append
(
"|"
);
//STATUS --> Comment
retStr2
.
append
(
"not_curated"
);
retStr2
.
append
(
"\n"
);
}
}
}
createTxtFile
(
cvsFindingsOutput
,
retStr
.
toString
());
createTxtFile
(
cvsSRDomainOutput
,
retStr2
.
toString
());
}
catch
(
IOException
e
)
{
// TODO Auto-generated catch block
e
.
printStackTrace
();
...
...
@@ -472,7 +600,7 @@ public class App {
}
else
{
String
finding_texto
=
finding
.
getFeatures
().
get
(
"text"
)==
null
?
""
:
finding
.
getFeatures
().
get
(
"text"
).
toString
();
if
(
finding_texto
.
equals
(
""
))
{
System
.
out
.
println
(
"text empty finding"
);
//
limtox no
t
i
ene
text
System
.
out
.
println
(
"text empty finding"
);
//
No se puede entrar por aqui todos deben
tene
r el feature text en la anotacion
}
srDomainFinding
.
setSRORES
(
finding_texto
);
}
...
...
@@ -660,6 +788,7 @@ public class App {
return
srDomainFindingList
;
}
private
static
void
processRelevantSentences
(
String
annotationSet
,
String
annotationSetRelationExtraction
,
gate
.
Document
doc
,
JsonObject
annotated_document
)
{
AnnotationSet
relevant_sentences_set
=
doc
.
getAnnotations
(
annotationSet
).
get
(
"PRETOX_REL"
);
JsonArray
relevant_sentences
=
new
JsonArray
();
...
...
@@ -713,9 +842,8 @@ public class App {
* @throws FileNotFoundException
* @throws IOException
*/
private
static
void
createTxtFile
(
String
path
,
String
plainText
)
throws
FileNotFoundException
,
IOException
{
File
fout
=
new
File
(
path
);
FileOutputStream
fos
=
new
FileOutputStream
(
fout
);
private
static
void
createTxtFile
(
File
file
,
String
plainText
)
throws
FileNotFoundException
,
IOException
{
FileOutputStream
fos
=
new
FileOutputStream
(
file
);
BufferedWriter
bw
=
new
BufferedWriter
(
new
OutputStreamWriter
(
fos
,
StandardCharsets
.
UTF_8
));
bw
.
write
(
plainText
);
bw
.
flush
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment