#!/usr/bin/env python3
from lxml import etree
from sys import stdin, stderr, argv
from chardet import detect
html=""
i=0

def show_help():
    helpstr="""
xp takes data from stdin, and supplies the result of an xpath selection to stdout.
For example (assuming xp is in your $PATH):

curl -s example.com | xp '//title/text()'
(result should be 'Example Domain')

xp with an invalid xpath expression or an empty stdin will print this help message"""
    print(helpstr)
    exit(0)

if stdin.isatty():
    show_help()

if len(argv[1:]) == 0:
    show_help()

while i < len(argv[1:]):
    i = i + 1
    xpr=argv[i].strip("\"\'")
    if len(xpr) == 0 or not xpr[0] in '/(':
        show_help()
    else:
        xpath=argv[i]

i=0
bytelines = stdin.buffer
for bline in bytelines:
    if i == 0:
        try:
            if 'encoding=' in bline.decode():
                continue
        except Exception:
            pass
    #guess the encoding of the input so that I can use it
    guess=detect(bline)
    if guess['encoding'] == None:
        pass
    else:
        line=bline.decode(guess['encoding'], errors='ignore')
        html = html + line
    i+=1

htmlparser=etree.HTMLParser()
tree=etree.fromstring(html, htmlparser)

try:
    xpaths=tree.xpath(xpath)
except etree.XPathEvalError:
    show_help()

for xpath in xpaths:
    if type(xpath) == etree._ElementUnicodeResult:
        print(xpath)
    else:
        print(xpath.text)
