Commit 88e862c3 authored by dgasull's avatar dgasull

Adding Kafka to dataClay dockers

parent f1efa9b5
FROM bscdataclay/client:2.1
LABEL maintainer dataClay team <support-dataclay@bsc.es>
# Prepare environment
ENV ELASTIC_HOME=/elastic
WORKDIR ${ELASTIC_HOME}
ENV DATACLAYCLIENTCONFIG=${ELASTIC_HOME}/cfgfiles/client.properties
ENV DATACLAYGLOBALCONFIG=${ELASTIC_HOME}/cfgfiles/global.properties
ENV DATACLAYSESSIONCONFIG=${ELASTIC_HOME}/cfgfiles/session.properties
ENV NAMESPACE=ElasticNS
ENV USER=ElasticUser
ENV PASS=ElasticPass
ENV DATASET=ElasticDS
ENV MODELBINPATH=${ELASTIC_HOME}/src
# If we want to run demo again, argument must be modified
ARG CACHEBUST=1
# Copy files
COPY ./src ${ELASTIC_HOME}/src
COPY ./cfgfiles ${ELASTIC_HOME}/cfgfiles
RUN cp ${ELASTIC_HOME}/cfgfiles/log4j2.xml ${DATACLAY_LOG_CONFIG}
# Wait for dataclay to be alive (max retries 10 and 5 seconds per retry)
RUN dataclaycmd WaitForDataClayToBeAlive 10 5
# Register account
RUN dataclaycmd NewAccount ${USER} ${PASS}
# Register datacontract
RUN dataclaycmd NewDataContract ${USER} ${PASS} ${DATASET} ${USER}
# Register model
RUN dataclaycmd NewModel ${USER} ${PASS} ${NAMESPACE} ${MODELBINPATH} python
# Run
ENTRYPOINT ["Nothing to do here"]
This diff is collapsed.
#!/bin/bash
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
#
## Build and start dataClay
#pushd $SCRIPTDIR/dataclay
#docker-compose kill
#docker-compose down -v #sanity check
#docker-compose up -d
#popd
#
## BUILD ####
#pushd $SCRIPTDIR
#docker build --network=dataclay_default \
# --build-arg CACHEBUST=$(date +%s) \
# -t bscdataclay/client:2.1-elastic .
#popd
#
#echo " ===== Retrieving execution classes into $SCRIPTDIR/deploy ====="
## Copy execClasses from docker
#rm -rf $SCRIPTDIR/deploy
#rm -rf $SCRIPTDIR/execClasses
#mkdir -p $SCRIPTDIR/deploy
#mkdir -p $SCRIPTDIR/execClasses
#docker cp dataclay_dspython_1:/home/dataclayusr/dataclay/deploy/ $SCRIPTDIR
#docker cp dataclay_dsjava_1:/home/dataclayusr/dataclay/execClasses/ $SCRIPTDIR
#
#echo " ===== Retrieving SQLITE LM into $SCRIPTDIR/LM.sqlite ====="
#rm -f $SCRIPTDIR/LM.sqlite
#TABLES="account credential contract interface ifaceincontract opimplementations datacontract dataset accessedimpl accessedprop type java_type python_type memoryfeature cpufeature langfeature archfeature prefetchinginfo implementation python_implementation java_implementation annotation property java_property python_property operation java_operation python_operation metaclass java_metaclass python_metaclass namespace"
#for table in $TABLES;
#do
# docker exec -t dataclay_logicmodule_1 sqlite3 "//dataclay/storage/LM" ".dump $table" >> $SCRIPTDIR/LM.sqlite
#done
#
#echo " ===== Stopping dataClay ====="
#pushd $SCRIPTDIR/dataclay
#docker-compose -f docker-compose.yml down
#popd
pushd $SCRIPTDIR
echo " ===== Building docker bscdataclay/logicmodule-elastic ====="
docker build -f elastic.logicmodule.Dockerfile -t bscdataclay/logicmodule-elastic .
echo " ===== Building docker bscdataclay/dsjava-elastic ====="
docker build -f elastic.dsjava.Dockerfile -t bscdataclay/dsjava-elastic .
echo " ===== Building docker bscdataclay/dspython-elastic ====="
docker build -f elastic.dspython.Dockerfile -t bscdataclay/dspython-elastic .
popd
HOST=logicmodule
TCPPORT=11034
CHECK_LOG4J_DEBUG=false
<?xml version="1.0" encoding="UTF-8"?>
<Configuration monitorInterval="60" status="off">
<Appenders>
<Console name="ConsoleAppender" target="SYSTEM_ERR">
<PatternLayout pattern="%d{ISO8601} %p [%c] [%t] [%C{1}:%L] %m%n"></PatternLayout>
</Console>
</Appenders>
<Loggers>
<!-- Runtime -->
<Logger name="ClientRuntime" level="off" />
<Logger name="ClientManagementLib" level="off"/>
<Logger name="DataClayRuntime" level="off"/>
<Logger name="DataServiceRuntime" level="off"/>
<Logger name="DataClayObjectLoader" level="off"/>
<Logger name="DataClayObject" level="off" /> <!-- This is very verbose! -->
<!-- Data service -->
<Logger name="DataService" level="info"/>
<Logger name="ExecutionEnvironmentSrv" level="off"/>
<!-- Lockers -->
<Logger name="dataclay.util.classloaders.SyncClass" level="off"/>
<Logger name="dataclay.heap.LockerPool" level="off"/>
<Logger name="LockerPool" level="off"/>
<Logger name="dataclay.util.classloaders.ClassLockers" level="off"/>
<!-- Garbage collection -->
<Logger name="GlobalGC" level="off"/>
<Logger name="heap.HeapManager" level="off"/>
<Logger name="ReferenceCounting" level="off"/>
<Logger name="StorageLocation" level="off"/>
<!-- Logic module -->
<Logger name="LogicModule" level="info"/>
<Logger name="LMDB" level="off"/>
<Logger name="managers" level="off" />
<Logger name="MetaDataService.db" level="off" />
<Logger name="MetaDataService" level="off" />
<!-- Communication -->
<Logger name="io.grpc" level="off"/>
<Logger name="io.netty" level="off"/>
<Logger name="NettyClientHandler" level="off"/>
<Logger name="grpc.client" level="off"/>
<Logger name="communication.LogicModule.service" level="off"/>
<Logger name="grpc.client.logicmodule" level="off"/>
<Logger name="grpc.client.dataservice.DS1" level="off"/>
<!-- Databases -->
<Logger name="dataclay.dbhandler" level="off"/>
<Logger name="dbhandler.PostgresConnection" level="off" />
<Logger name="org.apache.commons.dbcp2" level="off"/>
<Logger name="PostgresHandler" level="off"/>
<Logger name="SQLHandler" level="off"/>
<!-- Misc -->
<Logger name="util" level="off" />
<Logger name="exceptions" level="off"/>
<Logger name="Paraver" level="info"/>
<Logger name="DataClaySerializationLib" level="off"/>
<Logger name="DataClayDeserializationLib" level="off"/>
<!-- ROOT LOGGER -->
<Root level="off">
<AppenderRef ref="ConsoleAppender" />
</Root>
</Loggers>
</Configuration>
version: '3.4'
services:
logicmodule:
image: "bscdataclay/logicmodule:2.1"
ports:
- "11034:11034"
environment:
- LOGICMODULE_PORT_TCP=11034
- LOGICMODULE_HOST=logicmodule
- DATACLAY_ADMIN_USER=admin
- DATACLAY_ADMIN_PASSWORD=admin
volumes:
- ./prop/global.properties:/home/dataclayusr/dataclay/cfgfiles/global.properties:ro
- ./prop/log4j2.xml:/home/dataclayusr/dataclay/loggging/log4j2.xml:ro
stop_grace_period: 5m
healthcheck:
interval: 5s
retries: 10
test: ["CMD-SHELL", "/home/dataclayusr/dataclay/health/health_check.sh"]
dsjava:
image: "bscdataclay/dsjava:2.1"
ports:
- "2127:2127"
depends_on:
- logicmodule
environment:
- DATASERVICE_NAME=DS1
- DATASERVICE_JAVA_PORT_TCP=2127
- LOGICMODULE_PORT_TCP=11034
- LOGICMODULE_HOST=logicmodule
volumes:
- ./prop/global.properties:/home/dataclayusr/dataclay/cfgfiles/global.properties:ro
- ./prop/log4j2.xml:/home/dataclayusr/dataclay/logging/log4j2.xml:ro
stop_grace_period: 5m
healthcheck:
interval: 5s
retries: 10
test: ["CMD-SHELL", "/home/dataclayusr/dataclay/health/health_check.sh"]
dspython:
image: "bscdataclay/dspython:2.1"
depends_on:
- logicmodule
- dsjava
environment:
- DATASERVICE_NAME=DS1
- LOGICMODULE_PORT_TCP=11034
- LOGICMODULE_HOST=logicmodule
- DEBUG=True
volumes:
- ./prop/global.properties:/home/dataclayusr/dataclay/cfgfiles/global.properties:ro
stop_grace_period: 5m
healthcheck:
interval: 5s
retries: 10
test: ["CMD-SHELL", "/home/dataclayusr/dataclay/health/health_check.sh"]
\ No newline at end of file
CHECK_LOG4J_ENABLED=true
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<Configuration monitorInterval="60" status="off">
<Appenders>
<Console name="ConsoleAppender" target="SYSTEM_OUT">
<PatternLayout
pattern="%d{ISO8601} %p [%c] [%t] [%C{1}:%L] %m%n"></PatternLayout>
</Console>
</Appenders>
<Loggers>
<!-- Runtime -->
<Logger name="ClientRuntime" level="off" />
<Logger name="ClientManagementLib" level="off" />
<Logger name="DataClayRuntime" level="off" />
<Logger name="DataServiceRuntime" level="off" />
<Logger name="DataClayObjectLoader" level="off" />
<Logger name="DataClayObject" level="off" /> <!-- This is very verbose! -->
<!-- Data service -->
<Logger name="DataService" level="off" />
<Logger name="ExecutionEnvironmentSrv" level="off" />
<!-- Lockers -->
<Logger name="dataclay.util.classloaders.SyncClass" level="off" />
<Logger name="dataclay.heap.LockerPool" level="off" />
<Logger name="LockerPool" level="off" />
<Logger name="dataclay.util.classloaders.ClassLockers"
level="off" />
<!-- Garbage collection -->
<Logger name="GlobalGC" level="off" />
<Logger name="heap.HeapManager" level="off" />
<Logger name="ReferenceCounting" level="off" />
<Logger name="StorageLocation" level="off" />
<!-- Logic module -->
<Logger name="LogicModule" level="off" />
<Logger name="LMDB" level="off" />
<Logger name="managers" level="off" />
<Logger name="MetaDataService.db" level="off" />
<Logger name="MetaDataService" level="off" />
<!-- Communication -->
<Logger name="io.grpc" level="off" />
<Logger name="io.netty" level="off" />
<Logger name="NettyClientHandler" level="off" />
<Logger name="grpc.client" level="off" />
<Logger name="grpc.client.logicmodule" level="off" />
<Logger name="communication.LogicModule.service" level="off" />
<!-- Databases -->
<Logger name="dataclay.dbhandler" level="off" />
<Logger name="dbhandler.PostgresConnection" level="off" />
<Logger name="org.apache.commons.dbcp2" level="off" />
<Logger name="PostgresHandler" level="off" />
<Logger name="SQLHandler" level="off" />
<!-- Misc -->
<Logger name="util" level="off" />
<Logger name="exceptions" level="off" />
<Logger name="Paraver" level="info" />
<Logger name="DataClaySerializationLib" level="off" />
<Logger name="DataClayDeserializationLib" level="off" />
<!-- ROOT LOGGER -->
<Root level="off">
<AppenderRef ref="ConsoleAppender" />
</Root>
</Loggers>
</Configuration>
#!/bin/sh -e
FILENAME="kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz"
url=$(curl --stderr /dev/null "https://www.apache.org/dyn/closer.cgi?path=/kafka/${KAFKA_VERSION}/${FILENAME}&as_json=1" | jq -r '"\(.preferred)\(.path_info)"')
echo "Downloading Kafka from $url"
wget -q "${url}" -O /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz
\ No newline at end of file
FROM bscdataclay/dsjava:2.1
# Install packages:
RUN apt-get update \
&& apt-get install --no-install-recommends -y --allow-unauthenticated curl wget jq >/dev/null \
&& rm -rf /var/lib/apt/lists/*
# KAFKA
ENV KAFKA_VERSION=2.4.1
ENV SCALA_VERSION=2.12
ENV KAFKA_HOME=/opt/kafka
ENV PATH=${PATH}:${KAFKA_HOME}/bin
RUN mkdir -p ${KAFKA_HOME}
COPY ./download_kafka.sh ./download_kafka.sh
# Download kafka
RUN ./download_kafka.sh
RUN tar -xzf /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz --directory ${KAFKA_HOME} --strip-components=1
#COPY ./execClasses ${DATACLAY_HOME}/execClasses
# Execute
# Don't use CMD in order to keep compatibility with singularity container's generator
ENTRYPOINT ["dataclay-java-entry-point", "es.bsc.dataclay.dataservice.server.DataServiceSrv"]
\ No newline at end of file
FROM bscdataclay/dspython:2.1
# Install packages:
RUN apt-get update \
&& apt-get install --no-install-recommends -y --allow-unauthenticated curl wget jq >/dev/null \
&& rm -rf /var/lib/apt/lists/*
# KAFKA
ENV KAFKA_VERSION=2.4.1
ENV SCALA_VERSION=2.12
ENV KAFKA_HOME=/opt/kafka
ENV PATH=${PATH}:${KAFKA_HOME}/bin
RUN mkdir -p ${KAFKA_HOME}
COPY ./download_kafka.sh ./download_kafka.sh
# Download kafka
RUN ./download_kafka.sh
RUN tar -xzf /tmp/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz --directory ${KAFKA_HOME} --strip-components=1
#COPY ./deploy ${DATACLAY_HOME}/deploy
# Execute
# Don't use CMD in order to keep compatibility with singularity container's generator
ENTRYPOINT ["dataclay-python-entry-point", "-m", "dataclay.executionenv.server"]
ARG DATACLAY_TAG
FROM ubuntu:18.04
RUN apt-get -y update
RUN apt-get install -y sqlite3 libsqlite3-dev
COPY ./LM.sqlite /tmp/dataclay/dump.sql
RUN mkdir -p "/dataclay/storage"
RUN sqlite3 "/dataclay/storage/LM" ".read /tmp/dataclay/dump.sql"
FROM bscdataclay/logicmodule:2.1
COPY --from=0 /dataclay/storage/LM /dataclay/storage/LM
# The command can contain additional options for the Java Virtual Machine and
# must contain a class to be executed.
ENTRYPOINT ["dataclay-java-entry-point", "es.bsc.dataclay.logic.server.LogicModuleSrv"]
# Don't use CMD in order to keep compatibility with singularity container's generator
\ No newline at end of file
#!/bin/bash
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
pushd $SCRIPTDIR
echo " ===== Pushing docker bscdataclay/logicmodule ====="
docker push bscdataclay/logicmodule-elastic
echo " ===== Pushing docker bscdataclay/dsjava ====="
docker push bscdataclay/dsjava-elastic
echo " ===== Pushing docker bscdataclay/dspython ====="
docker push bscdataclay/dspython-elastic
popd
/classes.pyc
from dataclay import DataClayObject, dclayMethod
class DataClayKafkaMixin(DataClayObject):
"""
"""
@dclayMethod()
def send_to_kafka(self):
#convert to JSON string
import json
import inspect
attributes = inspect.getmembers(self.__class__, lambda a:not(inspect.isroutine(a)))
field_values = {}
for field in attributes:
fieldname = field[0]
if not(fieldname.startswith('_')):
field_values[fieldname]=getattr(self, fieldname)
print(field_values)
jsonStr = json.dumps(field_values)
KAFKA_ZOOKEEPER_CONNECT = os.getenv('KAFKA_ZOOKEEPER_CONNECT', "zookeeper:2181")
KAFKA_ADDR = os.getenv('KAFKA_ADDR', "kafka:9092")
KAFKA_TOPIC = os.getenv('KAFKA_TOPIC', "test")
cmd = "kafka-console-producer.sh --broker-list %s --topic %s <<< '%s'" % (KAFKA_ADDR, KAFKA_TOPIC, jsonStr)
print(cmd)
#import subprocess
#pro = subprocess.Popen(['kafka-console-producer.sh', '--broker-list', KAFKA_ADDR, '--topic', KAFKA_TOPIC],
#stdin=subprocess.PIPE, stdout=subprocess.PIPE)
#stdout, stderr = pro.communicate(input='%s\n' % jsonStr)
#os.killpg(os.getpgid(pro.pid), signal.SIGTERM) # Send the signal to all the process groups
os.system(cmd)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment