Commit 20e710c2 authored by javi's avatar javi

threads improve feature

parent 8b0e9be6
Pipeline #8552 passed with stage
in 2 minutes and 30 seconds
package es.bsc.inb.nlp.gate.generic.component.main;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.Set;
import org.apache.maven.shared.utils.io.FileUtils;
import gate.Corpus;
import gate.Factory;
import gate.FeatureMap;
import gate.Gate;
import gate.LanguageAnalyser;
import gate.ProcessingResource;
import gate.creole.ExecutionException;
import gate.creole.Plugin;
import gate.creole.ResourceInstantiationException;
import gate.creole.SerialAnalyserController;
import gate.util.GateException;
import gate.util.InvalidOffsetException;
/**
*
* @author javi
*
*/
public class Process implements Runnable {
int name;
File[] files = null;
Map<String,String> parameters;
Set<String> processedFiles = null;
public Process(int name, File[] files, Set<String> processedFiles,Map<String,String> parameters) {
super();
this.name=name;
this.files = files;
this.processedFiles = processedFiles;
this.parameters = parameters;
}
public void run() {
try {
System.out.println("Thread " + name + " -- Total files to process : " + files.length);
// create a serial analyser controller to run ANNIE with
SerialAnalyserController annieController;
annieController = (SerialAnalyserController) Factory.createResource("gate.creole.SerialAnalyserController",
Factory.newFeatureMap(), Factory.newFeatureMap(), "ANNIE");
if(parameters.get("listsURL")!=null) {
//Basic Gazetter
//Gazetter parameters
FeatureMap params = Factory.newFeatureMap();
params.put("listsURL", new File(parameters.get("listsURL")).toURL());
params.put("gazetteerFeatureSeparator", parameters.get("gazetteerFeatureSeparator"));
params.put("caseSensitive",parameters.get("caseSensitive"));
params.put("longestMatchOnly",parameters.get("longestMatchOnly"));
ProcessingResource pr_basic_gazetter = (ProcessingResource) Factory.createResource("gate.creole.gazetteer.DefaultGazetteer", params);
//Flexible Gazetter
if(parameters.get("gazetter_type").equals("flexible")) {
Plugin toolsPlugin = new Plugin.Maven("uk.ac.gate.plugins", "tools", "8.6");
Gate.getCreoleRegister().registerPlugin(toolsPlugin);
FeatureMap params2 = Factory.newFeatureMap();
ProcessingResource pr_flexi_gazetter = (ProcessingResource) Factory.createResource("gate.creole.gazetteer.FlexibleGazetteer", params2);
pr_flexi_gazetter.setParameterValue("inputASName", parameters.get("inputASName"));
if (parameters.get("inputFeatureNames")==null) {
System.out.println("No inputFeatureNames defined, Token.root and Token.word as default");
ArrayList<String> l = new ArrayList<String>(Arrays.asList("Token.root","Token.word"));
pr_flexi_gazetter.setParameterValue("inputFeatureNames", l);
}else {
ArrayList<String> l = new ArrayList<String>(Arrays.asList(parameters.get("inputFeatureNames").toString().split(",")));
pr_flexi_gazetter.setParameterValue("inputFeatureNames", l);
}
pr_flexi_gazetter.setParameterValue("gazetteerInst", pr_basic_gazetter);
pr_flexi_gazetter.setParameterValue("outputASName", parameters.get("outputASName"));
annieController.add(pr_flexi_gazetter);
}else { //Default Gazetter
pr_basic_gazetter.setParameterValue("annotationSetName", parameters.get("outputASName"));
annieController.add(pr_basic_gazetter);
}
}
LanguageAnalyser jape = null;
if(parameters.get("japeMainPath")!=null) {
jape = (LanguageAnalyser)gate.Factory.createResource("gate.creole.Transducer", gate.Utils.featureMap(
"grammarURL", new File(parameters.get("japeMainPath")).toURI().toURL(),"encoding", "UTF-8"));
jape.setParameterValue("inputASName", parameters.get("inputASName"));
jape.setParameterValue("outputASName", parameters.get("outputASName"));
annieController.add(jape);
}
for (File file : files) {
if((file.getName().endsWith(".xml") || file.getName().endsWith(".txt")) && !processedFiles.contains(FileUtils.removeExtension(file.getName()))){
try {
String fileOutPutName = file.getName();
if(fileOutPutName.endsWith(".txt")) {
fileOutPutName = fileOutPutName.replace(".txt", ".xml");
}
File outputGATEFile = new File (parameters.get("outputDirectory") + File.separator + fileOutPutName);
processDocument(annieController, file, outputGATEFile);
fileOutPutName=null;
outputGATEFile=null;
} catch (ResourceInstantiationException e) {
System.out.println("App::process :: error with document " + file.getAbsolutePath());
e.printStackTrace();
} catch (MalformedURLException e) {
System.out.println("App::process :: error with document " + file.getAbsolutePath());
e.printStackTrace();
} catch (InvalidOffsetException e) {
System.out.println("App::process :: error with document " + file.getAbsolutePath());
e.printStackTrace();
}
}
}
//free resources
// if(pr_gazetter!=null) {
// Factory.deleteResource(pr_gazetter);
// }
if(jape!=null) {
Factory.deleteResource(jape);
}
Factory.deleteResource(annieController);
} catch (MalformedURLException | GateException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
/**
* Execute process in a document
* @param pipeline
* @param inputFile
* @param outputGATEFile
* @throws ResourceInstantiationException
* @throws MalformedURLException
* @throws InvalidOffsetException
*/
private static void processDocument(SerialAnalyserController annieController, File inputFile, File outputGATEFile) throws ResourceInstantiationException, MalformedURLException, InvalidOffsetException {
try {
gate.Document gateDocument = Factory.newDocument(inputFile.toURI().toURL(), "UTF-8");
Corpus corpus = Factory.newCorpus("My XML Files");
corpus.add(gateDocument);
annieController.setCorpus(corpus);
annieController.execute();
java.io.Writer out = new java.io.BufferedWriter(new java.io.OutputStreamWriter(new FileOutputStream(outputGATEFile, false)));
out.write(gateDocument.toXml());
out.flush();
out.close();
gateDocument.cleanup();
gateDocument=null;
out=null;
corpus.clear();
corpus.cleanup();
corpus=null;
} catch (IOException e) {
System.out.println("App :: processDocument :: IOException ");
e.printStackTrace();
} catch (Exception e) {
System.out.println("App :: processDocument :: Exception ");
e.printStackTrace();
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment