FROM python:3.8-slim-buster

ARG TRAININGDATA
ARG TESTDATA
ARG MODEL
ARG MODELTYPE
ARG LANGUAGE

ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1

# install system dependencies
RUN apt-get update \
    && apt-get -y install gcc make \
    && rm -rf /var/lib/apt/lists/*s

WORKDIR /code

ENV VIRTUAL_ENV=/opt/venv
RUN python3 -m venv $VIRTUAL_ENV
ENV PATH="$VIRTUAL_ENV/bin:$PATH"

COPY ./requirements.txt /code/requirements.txt

RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
RUN pip install -r /code/requirements.txt
#RUN python -m spacy download en_core_web_sm

EXPOSE 8000

COPY ./app /code/app

RUN mkdir -p /code/app/spacy_model/corpus/trainingdata &&\
    mkdir -p /code/app/spacy_model/output/model-best &&\
    mkdir -p /code/app/spacy_model/corpus/spacy-docbins &&\
    mkdir -p /code/app/spacy_model/intermediate/results &&\
    mkdir -p /code/app/spacy_model/intermediate/docbins &&\
    mkdir -p /code/app/spacy_model/intermediate/model &&\
    mkdir -p /code/app/spacy_model/intermediate/trainingdata

COPY ./data/trainingdata /code/app/spacy_model/corpus/trainingdata/
COPY ./data/model /code/app/spacy_model/output/model-best/

# validate any csv files
RUN python /code/app/format_input.py ${TRAININGDATA} ${TESTDATA}

# check if new model must be trained
RUN python /code/app/train_a_model.py ${MODELTYPE} ${LANGUAGE}

CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8002"]
