Commit 566eb032 authored by javi's avatar javi
Browse files

Externalization of Gate parameters: gazetteerFeatureSeparator,

caseSensitive, longestMatchOnly.
Also Map for manage parameters.
parent 3d526365
......@@ -6,6 +6,8 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
......@@ -34,6 +36,8 @@ import gate.util.GateException;
*
*/
public class App {
public static void main( String[] args ){
Options options = new Options();
......@@ -63,6 +67,20 @@ public class App {
iset.setRequired(false);
options.addOption(iset);
Option gazetteerFeatureSeparator = new Option("gazetteerFeatureSeparator", "gazetteerFeatureSeparator", true, "The character used to add arbitrary features to gazetteer entries. Default tab");
gazetteerFeatureSeparator.setRequired(false);
options.addOption(gazetteerFeatureSeparator);
Option caseSensitive = new Option("caseSensitive", "caseSensitive", true, "Should the gazetteer be case sensitive during matching. Default false");
caseSensitive.setRequired(false);
options.addOption(caseSensitive);
Option longestMatchOnly = new Option("longestMatchOnly", "longestMatchOnly", true, "This parameter is only relevant when the list of lookups contains proper prefixes "
+ "of other entries (e.g when both ‘Body Weight’ and ‘Body Weight loss’ are in the lists). The default behaviour (when this parameter is set to true) is to only match the longest entry, ‘Body Weight loss’ in this example. "
+ "Setting this parameter to false will cause the gazetteer to match all possible prefixes.");
longestMatchOnly.setRequired(false);
options.addOption(longestMatchOnly);
Option workdir = new Option("w", "workdir", true, "workDir directory path");
workdir.setRequired(false);
options.addOption(workdir);
......@@ -77,28 +95,53 @@ public class App {
formatter.printHelp("utility-name", options);
System.exit(1);
}
String inputFilePath = cmd.getOptionValue("input");
Map<String,String> parameters = new HashMap<String,String>();
String outputFilePath = cmd.getOptionValue("output");
String workdirPath = cmd.getOptionValue("workdir");
String annotationSet = cmd.getOptionValue("annotation_set");
String inputAnnotationSet = cmd.getOptionValue("input_annotation_set");
String listsDefinitionsPath = cmd.getOptionValue("lists_definitions");
String japeMainPath = cmd.getOptionValue("jape_main");
if (!java.nio.file.Files.isDirectory(Paths.get(inputFilePath))) {
if (!java.nio.file.Files.isDirectory(Paths.get(cmd.getOptionValue("input")))) {
System.out.println(" Please set the inputDirectoryPath ");
System.exit(1);
}
if (annotationSet==null) {
}
parameters.put("inputFilePath", cmd.getOptionValue("input"));
if (cmd.getOptionValue("annotation_set")==null) {
System.out.println("Please set the annotation set where the annotation will be included");
System.exit(1);
}
parameters.put("annotationSet", cmd.getOptionValue("annotation_set"));
if (inputAnnotationSet==null) {
if (cmd.getOptionValue("input_annotation_set")==null) {
System.out.println("The input annotation set not set, same as output is selected");
inputAnnotationSet = annotationSet;
parameters.put("inputAnnotationSet", cmd.getOptionValue("annotation_set"));
}else {
parameters.put("inputAnnotationSet", cmd.getOptionValue("input_annotation_set"));
}
if (cmd.getOptionValue("gazetteerFeatureSeparator")==null) {
parameters.put("gazetteerFeatureSeparator", "\t");
}else {
parameters.put("gazetteerFeatureSeparator", cmd.getOptionValue("gazetteerFeatureSeparator"));
}
if (cmd.getOptionValue("caseSensitive")==null) {
parameters.put("caseSensitive", "false");
}else {
parameters.put("caseSensitive", cmd.getOptionValue("caseSensitive"));
}
if (cmd.getOptionValue("longestMatchOnly")==null) {
parameters.put("longestMatchOnly", "true");
}else {
parameters.put("longestMatchOnly", cmd.getOptionValue("longestMatchOnly"));
}
if(workdirPath==null) {
workdirPath = "";
}
......@@ -136,7 +179,9 @@ public class App {
}
}
parameters.put("listsDefinitionsPath", listsDefinitionsPath);
if(japeMainPath==null) {
System.out.println("No Jape Main Rules were provided.");
}else {
......@@ -148,6 +193,9 @@ public class App {
}
}
parameters.put("japeMainPath", japeMainPath);
if(!execution) {
System.out.println("No gazzeter or Jape Rules were provided. There is nothing to do. Please review your configuration");
System.exit(1);
......@@ -156,6 +204,8 @@ public class App {
File outputDirectory = new File(outputFilePath);
if(!outputDirectory.exists())
outputDirectory.mkdirs();
parameters.put("outputDirectory", outputFilePath);
try {
Gate.init();
......@@ -166,7 +216,7 @@ public class App {
}
try {
process(inputFilePath, outputFilePath, listsDefinitionsPath, japeMainPath, inputAnnotationSet, annotationSet, workdirPath);
process(parameters);
} catch (GateException e) {
// TODO Auto-generated catch block
e.printStackTrace();
......@@ -184,11 +234,12 @@ public class App {
* @throws GateException
* @throws IOException
*/
private static void process(String inputDirectory, String outputDirectory, String listsDefinitionsPath, String japeRules, String inputAnnotationSet, String outAnnotationSet, String workdirPath) throws GateException, IOException {
private static void process(Map<String,String> parameters) throws GateException, IOException {
// private static void process(String inputDirectory, String outputDirectory, String listsDefinitionsPath, String japeRules, String inputAnnotationSet, String outAnnotationSet, String workdirPath) throws GateException, IOException {
try {
System.out.println("App :: main :: INIT PROCESS");
Corpus corpus = Factory.newCorpus("My Files");
File directory = new File(inputDirectory);
File directory = new File(parameters.get("inputFilePath"));
ExtensionFileFilter filter = new ExtensionFileFilter("Txt files", new String[]{"txt","xml"});
URL url = directory.toURL();
corpus.populate(url, filter, null, false);
......@@ -200,23 +251,24 @@ public class App {
annieController.setCorpus(corpus);
ProcessingResource pr_gazetter = null;
if(listsDefinitionsPath!=null) {
if(parameters.get("listsDefinitionsPath")!=null) {
//Gazetter parameters
FeatureMap params = Factory.newFeatureMap();
params.put("listsURL", new File(listsDefinitionsPath).toURL());
params.put("gazetteerFeatureSeparator", "\t");
params.put("caseSensitive",false);
params.put("listsURL", new File(parameters.get("listsDefinitionsPath")).toURL());
params.put("gazetteerFeatureSeparator", parameters.get("gazetteerFeatureSeparator"));
params.put("caseSensitive",parameters.get("caseSensitive"));
params.put("longestMatchOnly",parameters.get("longestMatchOnly"));
pr_gazetter = (ProcessingResource) Factory.createResource("gate.creole.gazetteer.DefaultGazetteer", params);
pr_gazetter.setParameterValue("annotationSetName", outAnnotationSet);
pr_gazetter.setParameterValue("annotationSetName", parameters.get("annotationSet"));
annieController.add(pr_gazetter);
}
LanguageAnalyser jape = null;
if(japeRules!=null) {
if(parameters.get("japeMainPath")!=null) {
jape = (LanguageAnalyser)gate.Factory.createResource("gate.creole.Transducer", gate.Utils.featureMap(
"grammarURL", new File(japeRules).toURI().toURL(),"encoding", "UTF-8"));
jape.setParameterValue("inputASName", inputAnnotationSet);
jape.setParameterValue("outputASName", outAnnotationSet);
"grammarURL", new File(parameters.get("japeMainPath")).toURI().toURL(),"encoding", "UTF-8"));
jape.setParameterValue("inputASName", parameters.get("inputAnnotationSet"));
jape.setParameterValue("outputASName", parameters.get("annotationSet"));
annieController.add(jape);
}
......@@ -242,7 +294,7 @@ public class App {
}else {
nameOutput = document.getName().substring(0, document.getName().indexOf(".xml")+4);
}
java.io.Writer out = new java.io.BufferedWriter(new java.io.OutputStreamWriter(new FileOutputStream(new File(outputDirectory + File.separator + nameOutput), false)));
java.io.Writer out = new java.io.BufferedWriter(new java.io.OutputStreamWriter(new FileOutputStream(new File(parameters.get("outputDirectory") + File.separator + nameOutput), false)));
out.write(document.toXml());
out.close();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment