Commit 9356167a authored by jcorvi's avatar jcorvi
Browse files

dictionaries in zip files

parent 950f69e7
......@@ -2,4 +2,9 @@
## Version 1.0, 2020-03-03
First version of the component.
\ No newline at end of file
First version of the component.
## Version 1.1, 2020-03-10
Posibility of adding a .zip file in the dictionary definition.
Parameter -l ---> Dictionary List definitions. A lists.def Gate-formatted file separated by tab can be provided or a zip file that contains the dictionary/gazetteer files including the lists.def
\ No newline at end of file
......@@ -17,7 +17,7 @@ https://gate.ac.uk/sale/thakker-jape-tutorial/GATE%20JAPE%20manual.pdf
This library is very useful if you need to execute gazeteers lookup and JAPE rules in batch mode, inside a Nextflow pipeline for example.
## Actual Version: 1.0, 2020-03-04
## Actual Version: 1.1, 2020-03-04
## [Changelog](https://gitlab.bsc.es/inb/text-mining/generic-tools/import-json-to-mongo/blob/master/CHANGELOG)
## Docker
......@@ -43,7 +43,7 @@ Parameters:
-ia Input Annotation Set. If you want to provided different input annotation, set this parameter. By default the -a output annotation set is used as input.
</p>
<p>
-l list definition of the dictionary in GATE format.
-l Dictionary List definitions. A lists.def Gate-formatted file separated by tab can be provided or a zip file that contains the dictionary/gazetteer files including the lists.def
</p>
<p>
-j main.jape path with the JAPE rules to be executed.
......
package es.bsc.inb.nlp.gate.generic.component.main;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Paths;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
......@@ -43,7 +46,8 @@ public class App {
output.setRequired(true);
options.addOption(output);
Option listDefinitions = new Option("l", "lists_definitions", true, "Dictionary List definitions, Gate format.");
Option listDefinitions = new Option("l", "lists_definitions", true, "Dictionary List definitions. "
+ "A lists.def Gate-formatted file separated by tab can be provided or a zip file that contains the dictionary/gazetteer files including the lists.def ");
listDefinitions.setRequired(false);
options.addOption(listDefinitions);
......@@ -106,13 +110,34 @@ public class App {
System.out.println("No dictionary was provided.");
}else {
listsDefinitionsPath = workdirPath+listsDefinitionsPath;
System.out.println("Dictionary Path " + listsDefinitionsPath);
execution = true;
if (!java.nio.file.Files.isRegularFile(Paths.get(listsDefinitionsPath))) {
System.out.println("Please set a correct path to the list of dictionaries to annotate");
System.out.println("Please set the list of dictionaries to annotate. You can provide the list.def file or a zip file. Please if you provided a zip file remember that it must contain a list.def file inside");
System.exit(1);
}
if(listsDefinitionsPath.endsWith(".zip")) {
try {
File file = new File(listsDefinitionsPath);
String dictionaryFolderPath = file.getName().substring(0, file.getName().indexOf(".zip"));
unZipIt(listsDefinitionsPath, workdirPath + dictionaryFolderPath );
listsDefinitionsPath = workdirPath + dictionaryFolderPath + File.separator + "lists.def";
if (!java.nio.file.Files.isRegularFile(Paths.get(listsDefinitionsPath))) {
System.out.println("Please if you provided a zip file remember that it must contain a list.def file inside.");
System.exit(1);
}
}catch(Exception e) {
System.out.println("Error unziping directory, please if you provided a zip file remember that it must contain a list.def file inside. ");
System.exit(1);
}
}else if(listsDefinitionsPath.endsWith(".def")) {
System.out.println(" Please set the list of dictionaries to annotate. No list.def file or .zip file provided.");
System.exit(1);
}
}
if(japeMainPath==null) {
System.out.println("No Jape Main Rules were provided.");
}else {
......@@ -231,4 +256,50 @@ public class App {
}
/**
* Basic unzipping folder method
* @param input
* @param output
* @throws IOException
*/
private static void unZipIt(String zipFile, String outputFolder){
byte[] buffer = new byte[1024];
try{
//create output directory is not exists
File folder = new File(outputFolder);
if(!folder.exists()){
folder.mkdir();
}
//get the zip file content
ZipInputStream zis =
new ZipInputStream(new FileInputStream(zipFile));
//get the zipped file list entry
ZipEntry ze = zis.getNextEntry();
if(ze==null) {
System.out.println("Error unziping file, please review if you zip file provided is not corrupt file remember that it must contain a list.def file inside.");
System.exit(1);
}
while(ze!=null){
String fileName = ze.getName();
File newFile = new File(outputFolder + File.separator + fileName);
System.out.println("file unzip : "+ newFile.getAbsoluteFile());
//create all non exists folders
//else you will hit FileNotFoundException for compressed folder
new File(newFile.getParent()).mkdirs();
FileOutputStream fos = new FileOutputStream(newFile);
int len;
while ((len = zis.read(buffer)) > 0) {
fos.write(buffer, 0, len);
}
fos.close();
ze = zis.getNextEntry();
}
zis.closeEntry();
zis.close();
System.out.println("Done");
}catch(IOException ex){
ex.printStackTrace();
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment