#!/bin/bash
# WF 2022-08-18
# k10plus PPN Id matching
picafile="$HOME/.ceurws/ceurws.pica"
jsonmatch=$HOME/.ceurws/ceurws-ppn.json
ppnbatch=$HOME/.ceurws/ceurs-ppn

if [ ! -f $picafile ]
then
  echo "$picafile missing see https://github.com/WolfgangFahl/pyCEURmake/issues/26"
  exit 1
fi

createJson() {
  local l_jsonmatch="$1"
sparql=/tmp/ceurw-sparql.$$

cat << EOF > $sparql
#
# get CEUR-WS Proceedings records by Volume with linked Event and EventSeries
#
# WF 2022-08-13
#
# the Volume number P478 is sometimes available with the proceedings item and sometimes as a qualifier
# of
#
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX schema: <http://schema.org/>
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?sVolume ?ppnToSet ?ppnId ?item ?itemLabel
WHERE {
  VALUES (?sVolume ?ppnToSet) {
EOF
 # get ppns
 # 003@ $01663037973
 # 036E $aCEUR workshop proceedings$lvol-2314
 cat $picafile | awk '
 /003@/ {
     gsub("[$]0","",$2)
     ppn=$2
 }
 $1="036E" && /aCEUR/ {
   #.l([vV]ol(ume).)
   # quirks
   #   ("proceedikngs$lvol-1185" "1023022214")
   #   ("1493$pAI*IA" "848525124")
   #   ("1492,2" "848521706")
   #   ("1492,1" "848521617")
   gsub("[Pp]roceedi[k]?ngs.l([vV]ol(ume)?[-.]?)?","",$4)
   gsub(".pAI*IA","",$4)
   if (length($4)==0) { volf=$5 } else { volf=$4 }

   #("3185" "P")
   printf("    (\x22%s\x22 \x22%s\x22)\n",volf,ppn)
 }
 ' >> $sparql
cat << EOF >> $sparql
   }
   ?item rdfs:label ?itemLabel.
   FILTER(LANG(?itemLabel) = "en")
   # Instance of Proceedings
   ?item wdt:P31 wd:Q1143604.
   # Part of the series
   ?item p:P179 ?partOfTheSeries.
   # CEUR Workshop proceedings
   ?partOfTheSeries ps:P179 wd:Q27230297.
   # Volumes via a a qualifier of the part of the series relation
   ?partOfTheSeries pq:P478 ?sVolume.
   # K10plus PPN ID
   OPTIONAL {
     ?item wdt:P6721 ?ppnId.
   }
} ORDER BY DESC (xsd:integer(?sVolume))
EOF

#atom $sparql
sparqlquery -qf $sparql -f json > $l_jsonmatch
}

if [ ! -f $jsonmatch ]
then
  createJson $jsonmatch
else
  echo "$jsonmatch already exists"
fi
timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
jq . $jsonmatch | awk -v timestamp="$timestamp" '
BEGIN {
  FS="\x22"
  print "#/bin/bash"
  print "# generated by k10plus script "
  print "# https://github.com/WolfgangFahl/pyCEURmake/blob/main/scripts/k10plus"
  print "# " timestamp
}
/itemLabel/ { next }
/item/ {
    gsub("http://www.wikidata.org/entity/","",$4)
    item=$4
    #print NR,item
    next
  }
/ppnToSet/ {
    ppnToSet=$4
    #print NR,ppnToSet
    next
}
/ppnId/ {
  ppnId=$4
  #print NR,ppnId
  next
}
/sVolume/  {
  volume=$4
  printf("# Volume %s\n",volume)
  if (length(ppnId)==0) {
    printf("wd add-claim %s P6721 %s\n",item,ppnToSet)
  } else {
    printf("# has ppnId %s\n",ppnId)
  }
  item=""
  volume=""
  ppnToSet=""
  ppnId=""
  next
}
' > $ppnbatch
chmod +x $ppnbatch
echo $ppnbatch
atom $ppnbatch
