From 60844997ffd403f715078e686dcd110e6fb34a24 Mon Sep 17 00:00:00 2001 From: jcorvi Date: Tue, 8 Feb 2022 15:07:15 +0100 Subject: [PATCH 1/5] add execution information --- pom.xml | 5 ++ .../es/bsc/inb/importjson/mongo/main/App.java | 48 ++++++++++++++++++- 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 7b046ad..c718fb1 100644 --- a/pom.xml +++ b/pom.xml @@ -32,6 +32,11 @@ mongo-java-driver 3.10.2 + + org.json + json + 20211205 + diff --git a/src/main/java/es/bsc/inb/importjson/mongo/main/App.java b/src/main/java/es/bsc/inb/importjson/mongo/main/App.java index abac2bf..d446ff7 100644 --- a/src/main/java/es/bsc/inb/importjson/mongo/main/App.java +++ b/src/main/java/es/bsc/inb/importjson/mongo/main/App.java @@ -3,6 +3,8 @@ package es.bsc.inb.importjson.mongo.main; import java.io.File; import java.io.IOException; import java.nio.file.Paths; +import java.text.SimpleDateFormat; +import java.util.Date; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -13,6 +15,7 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.io.FileUtils; import org.bson.Document; +import org.json.JSONObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; @@ -106,7 +109,8 @@ public class App { try { MongoClient mongoClient = new MongoClient(uri); MongoDatabase db = mongoClient.getDatabase(mongoDatabaseStr); - process(inputFilePath, workdirPath, db, collection_prefixStr); + Integer total_processed = process(inputFilePath, workdirPath, db, collection_prefixStr); + addExecution(db,collection_prefixStr, total_processed); mongoClient.close(); }catch(Exception e) { System.out.println("ERROR: App "); @@ -120,8 +124,9 @@ public class App { * @param properties_parameters_path * @throws IOException */ - public static void process(String inputDirectoryPath, String workdir, MongoDatabase mongoDB, String collection_prefixStr) throws IOException { + public static Integer process(String inputDirectoryPath, String workdir, MongoDatabase mongoDB, String collection_prefixStr) throws IOException { System.out.println("App::processTagger :: INIT "); + Integer total_processed = 0; if (java.nio.file.Files.isDirectory(Paths.get(inputDirectoryPath))) { File inputDirectory = new File(inputDirectoryPath); String collection = "documents"; @@ -141,6 +146,7 @@ public class App { collection = collection_prefixStr + "_" + collection; } processDocument(file, mongoDB, collection); + total_processed = total_processed + 1; } catch (MongoTimeoutException e) { System.out.println("App::process :: MongoTimeoutException ERROR " + file.getAbsolutePath()); System.out.println("App::process :: Please review the connection "); @@ -160,7 +166,10 @@ public class App { }else { System.out.println("App::process :: No directory : " + inputDirectoryPath); } + total_processed = total_processed / 2; + System.out.println("App::process :: TOTAL FILES PROCESSED : " + total_processed ); System.out.println("App::process :: END "); + return total_processed; } /** @@ -183,4 +192,39 @@ public class App { coll=null; } + + /** + * Execute process in a document + * @param inputFile + * @param outputGATEFile + * @throws ResourceInstantiationException + * @throws IOException + * @throws JsonGenerationException + * @throws InvalidOffsetException + */ + private static void addExecution(MongoDatabase mongoDB, String collection_prefixStr, Integer total_processed) throws IOException{ + String collection = "execution"; + if(collection_prefixStr!=null && !collection_prefixStr.equals("")) { + collection = collection_prefixStr + "_" + collection; + } + Date date = new Date(); + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + String strDate= formatter.format(date); + MongoCollection coll = mongoDB.getCollection(collection); + String jsonString = new JSONObject() + .put("date", strDate) + .put("comment", "DEBBIE Pipeline execution") + .put("abstracts_processed", total_processed) + .toString(); + Document doc = Document.parse(jsonString); + coll.insertOne(doc); + jsonString=null; + doc.clear(); + doc=null; + coll=null; + + } + + + } -- GitLab From 8f64220ef2afbd2d76c6a44296ccf5973d75e2b6 Mon Sep 17 00:00:00 2001 From: jcorvi Date: Tue, 8 Feb 2022 15:18:50 +0100 Subject: [PATCH 2/5] gitlab pipeline --- .gitlab-ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index adaee4e..414b52b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -5,8 +5,6 @@ stages: - build build_docker_image: stage: build - only: - - tags script: - docker login -u gitlab-ci-token -p $CI_BUILD_TOKEN $DOCKER_REGISTRY - docker build -t $IMAGE_FULL_PATH . -- GitLab From 788bd0be9c0fc85d14449013ecfcbfa89414b75f Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Mon, 29 Aug 2022 16:31:10 +0200 Subject: [PATCH 3/5] pretox import json to mongo --- .../es/bsc/inb/importjson/mongo/main/App.java | 112 +++++++++++++----- 1 file changed, 83 insertions(+), 29 deletions(-) diff --git a/src/main/java/es/bsc/inb/importjson/mongo/main/App.java b/src/main/java/es/bsc/inb/importjson/mongo/main/App.java index d446ff7..34ce4ce 100644 --- a/src/main/java/es/bsc/inb/importjson/mongo/main/App.java +++ b/src/main/java/es/bsc/inb/importjson/mongo/main/App.java @@ -15,19 +15,26 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.io.FileUtils; import org.bson.Document; +import org.bson.conversions.Bson; import org.json.JSONObject; +import com.mongodb.BasicDBObject; import com.mongodb.MongoClient; import com.mongodb.MongoClientURI; import com.mongodb.MongoCommandException; +import com.mongodb.MongoException; import com.mongodb.MongoTimeoutException; import com.mongodb.client.MongoCollection; import com.mongodb.client.MongoDatabase; +import com.mongodb.client.model.UpdateOptions; +import com.mongodb.client.model.Updates; +import com.mongodb.client.result.UpdateResult; +import static com.mongodb.client.model.Filters.eq; /** - * Import JSON files to Mongo DataBase. + * Import pretox results files to Mongo DataBase. * * @author javicorvi * @@ -54,11 +61,6 @@ public class App { mongoDatabase.setRequired(true); options.addOption(mongoDatabase); - Option collection = new Option("j", "collection_prefix", true, "Collection prefix, prefix to add over the collection name. Files are add to collection given the name fileid_collectionname.json. The json file will be added in collectionname. If a prefix is indicated will be added in collection_prex_collectionname." - + "Example: 1123232_abstract will be added in abstract collection, if a collection_prefix (dev) is included, the file will be added to dev_abstract"); - collection.setRequired(false); - options.addOption(collection); - CommandLineParser parser = new DefaultParser(); HelpFormatter formatter = new HelpFormatter(); CommandLine cmd = null; @@ -72,7 +74,6 @@ public class App { String inputFilePath = cmd.getOptionValue("input"); String workdirPath = cmd.getOptionValue("workdir"); - String collection_prefixStr = cmd.getOptionValue("collection_prefix"); String mongoClientUriStr = cmd.getOptionValue("mongoClientUri"); String mongoDatabaseStr = cmd.getOptionValue("mongoDatabase"); @@ -100,17 +101,10 @@ public class App { System.exit(1); } - if(collection_prefixStr==null) { - System.out.println("No collection prefix, default will be used filename_collectionname.json"); - //System.exit(1); - collection_prefixStr=""; - } - try { MongoClient mongoClient = new MongoClient(uri); MongoDatabase db = mongoClient.getDatabase(mongoDatabaseStr); - Integer total_processed = process(inputFilePath, workdirPath, db, collection_prefixStr); - addExecution(db,collection_prefixStr, total_processed); + Integer total_processed = process(inputFilePath, workdirPath, db); mongoClient.close(); }catch(Exception e) { System.out.println("ERROR: App "); @@ -124,28 +118,31 @@ public class App { * @param properties_parameters_path * @throws IOException */ - public static Integer process(String inputDirectoryPath, String workdir, MongoDatabase mongoDB, String collection_prefixStr) throws IOException { + public static Integer process(String inputDirectoryPath, String workdir, MongoDatabase mongoDB) throws IOException { System.out.println("App::processTagger :: INIT "); Integer total_processed = 0; if (java.nio.file.Files.isDirectory(Paths.get(inputDirectoryPath))) { File inputDirectory = new File(inputDirectoryPath); - String collection = "documents"; + String collection = ""; File[] files = inputDirectory.listFiles(); for (File file : files) { if(file.getName().endsWith(".json")){ try { System.out.println("App::process :: document: " + file); - int i = file.getName().lastIndexOf("_"); - if(i!=-1) { - collection = file.getName().substring(i); - collection = collection.replace("_", ""); - collection = collection.replace(".json", ""); - } - if(collection_prefixStr!=null && !collection_prefixStr.equals("")) { - collection = collection_prefixStr + "_" + collection; + collection = file.getName().substring(i); + collection = collection.replace("_", ""); + collection = collection.replace(".json", ""); + String report_section_name = file.getName().substring(0,i); + String section_name = file.getName().substring(report_section_name.lastIndexOf("_")+1,i); + String report_name = file.getName().substring(0,report_section_name.lastIndexOf("_")); + if(collection.equals("sections")) { + processSection(file, mongoDB, collection, report_name, section_name); + }else if(collection.equals("annotations")) { + processAnnotation(file, mongoDB, collection, report_name); + }else { + System.out.println("App::process :: JSON document, but not annotations or sections"); } - processDocument(file, mongoDB, collection); total_processed = total_processed + 1; } catch (MongoTimeoutException e) { System.out.println("App::process :: MongoTimeoutException ERROR " + file.getAbsolutePath()); @@ -155,11 +152,11 @@ public class App { System.out.println("App::process :: Command Exception with document " + file.getAbsolutePath()); System.out.println("App::process :: Please review if the user has write permissions "); e.printStackTrace(); - //System.exit(1); + System.exit(1); } catch (Exception e) { System.out.println("App::process :: Error with document " + file.getAbsolutePath()); e.printStackTrace(); - //System.exit(1); + System.exit(1); } } } @@ -171,6 +168,30 @@ public class App { System.out.println("App::process :: END "); return total_processed; } + + /** + * add section to the report in the mongo database + * @param mongoDB + * @param collection + */ + private static void addSectionToReport(MongoDatabase mongoDB, String collection, Document section, String reportName) { + BasicDBObject section_ref = new BasicDBObject(); + section_ref.put("$ref", "sections"); + section_ref.put("$id", section.get("_id")); + Document query = new Document().append("fileName", reportName+".pdf").append("status", "RUNNING"); + Bson updates = Updates.combine( + Updates.addToSet("sections", section_ref), + Updates.currentTimestamp("lastUpdated")); + UpdateOptions options = new UpdateOptions().upsert(false); + MongoCollection reports = mongoDB.getCollection("reports"); + try { + UpdateResult result = reports.updateOne(query, updates, options); + System.out.println("Modified document count: " + result.getModifiedCount()); + System.out.println("Upserted id: " + result.getUpsertedId()); // only contains a value when an upsert is performed + } catch (MongoException me) { + System.err.println("Unable to update due to an error: " + me); + } + } /** * Execute process in a document @@ -181,15 +202,48 @@ public class App { * @throws JsonGenerationException * @throws InvalidOffsetException */ - private static void processDocument(File inputFile, MongoDatabase mongoDB, String collection) throws IOException{ + private static void processAnnotation(File inputFile, MongoDatabase mongoDB, String collection, String reportName) throws IOException{ + try { MongoCollection coll = mongoDB.getCollection(collection); String jsonString = FileUtils.readFileToString(inputFile, "UTF-8"); Document doc = Document.parse(jsonString); + doc.append("_id", doc.get("id")); coll.insertOne(doc); jsonString=null; doc.clear(); doc=null; coll=null; + }catch(Exception e) { + System.out.println(e); + } + } + + /** + * Execute process in a document + * @param inputFile + * @param outputGATEFile + * @throws ResourceInstantiationException + * @throws IOException + * @throws JsonGenerationException + * @throws InvalidOffsetException + */ + private static void processSection(File inputFile, MongoDatabase mongoDB, String collection, String reportName, String sectionName) throws IOException{ + try { + MongoCollection coll = mongoDB.getCollection(collection); + String jsonString = FileUtils.readFileToString(inputFile, "UTF-8"); + Document doc = Document.parse(jsonString); + doc.append("_id", doc.get("id")); + doc.append("name", sectionName); + coll.insertOne(doc); + addSectionToReport(mongoDB, collection, doc, reportName); + jsonString=null; + doc.clear(); + doc=null; + coll=null; + }catch(Exception e) { + System.out.println(e); + } + } -- GitLab From 3a424962da5651a1f2aaddd056ba27ad3280f61a Mon Sep 17 00:00:00 2001 From: jcorvi Date: Mon, 29 Aug 2022 16:54:58 +0200 Subject: [PATCH 4/5] Update App.java --- src/main/java/es/bsc/inb/importjson/mongo/main/App.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/es/bsc/inb/importjson/mongo/main/App.java b/src/main/java/es/bsc/inb/importjson/mongo/main/App.java index 34ce4ce..1da9855 100644 --- a/src/main/java/es/bsc/inb/importjson/mongo/main/App.java +++ b/src/main/java/es/bsc/inb/importjson/mongo/main/App.java @@ -178,7 +178,7 @@ public class App { BasicDBObject section_ref = new BasicDBObject(); section_ref.put("$ref", "sections"); section_ref.put("$id", section.get("_id")); - Document query = new Document().append("fileName", reportName+".pdf").append("status", "RUNNING"); + Document query = new Document().append("name",reportName).append("status", "RUNNING"); Bson updates = Updates.combine( Updates.addToSet("sections", section_ref), Updates.currentTimestamp("lastUpdated")); -- GitLab From b1747afdf5c44dd7b780510db5a91eabbb665a1c Mon Sep 17 00:00:00 2001 From: Javi Corvi Date: Tue, 30 Aug 2022 13:58:24 +0200 Subject: [PATCH 5/5] pom artifact name --- pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index c718fb1..92b639a 100644 --- a/pom.xml +++ b/pom.xml @@ -3,11 +3,11 @@ 4.0.0 es.bsc.inb.nlp - import-json-to-mongo + pretox-import-pipeline-results-to-mongo 0.0.1-SNAPSHOT jar - ades_tagger + pretox-import-pipeline-results-to-mongo http://maven.apache.org -- GitLab