#!/usr/bin/env python3

import os
import argparse
import fileinput

import regex

VERSION = '1.0.2'

# Command-line arguments
parser = argparse.ArgumentParser(description='List symbols in input.',
                                 epilog='''
The default symbol type is words (-s "\p{L}+"); other useful settings
include:

  non-white-space characters: -s "\S+"
  alphanumerics and underscores: -s "\w+"
  XML tags: -s "<([a-zA-Z_:][a-zA-Z_:.0-9-]*)[\s>]"
''',
                                 formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-s', '--symbol', metavar='REGEXP', default=r'\p{L}+',
                    help='symbols are given by REGEXP')
parser.add_argument('-V', '--version', action='version',
                    version=f'%(prog)s {VERSION} (17 Sep 2022) by Reuben Thomas <rrt@sc3d.org>')
parser.add_argument('file', metavar='FILE', nargs='*')

args = parser.parse_args()

# Compile symbol-matching regexp
try:
    pattern = regex.compile(args.symbol)
except regex.error as err:
    parser.error(err.args[0])

# Process input
for line in fileinput.input(files=args.file or ['-']):
    for s in pattern.findall(line):
        print(s)
