########################################
# pattern.py
#

from re import compile 

s = '\A\{\"(\.\d+)\"\}\Z'
premisenote = compile(s)

s = "(\{\'\.\d+\'\})"
ancenote = compile(s)

s = "\\{\'(\.\d+)\'\\}"
prancenote = compile(s)

s = "\'(\.\d+)\'"
insetnote = compile(s)

#s = '\A\s*\\\\(chapter|(sub)*section)\W.*'
#latex_major_unit = compile(s)

s = "\\\\mathchardef\\\\([A-Z,a-z]*)=\"([A-F,\d]*)"
chardef = compile(s)
			
s = '\A\s*\\\\chap\W+(\d+)\..*'
chapthead = compile(s)

s = '\A\s*\\\\section\W.*'
latex_section = compile(s) 

latex_unit_prefix = '\A\s*\\\\'
latex_unit_suffix = '\W.*'

# The alternate spellings of 'prop' have been removed
s = '\A\\\\(prop)\W+(\d+)\.(\d+)\s*\$(.*)'
s = '\A\\\\(prop)\W+(\d+)\.(\d+)[^\$0-9]*\$(.*)'
s = '\A\\\\(prop)\W+((\d+)(\.(\d+))+)[^\$0-9]*\$(.*)'
s = '\A\\\\(prop[a-z]?)\W+(?P<refnum>(\d+)(\.(\d+))+)[^\$0-9]*\$(.*)'
thmnum = compile(s)  #Deprecate this
propnum = thmnum


inputfile = compile('\s*\\\\input\s+((.+)\.([lt]df))')

directive = compile('\s*%([_A-Za-z]+:)\s+((\S*)\s*(.*?))\s*\Z')

s = '\A\s*\\\\line([a-z])\s*\$(.*)'
line = compile(s)

s ='\A[^\$\%]*\\\\By(\W.*)'
by = compile(s)

s ='\A[^\$\%]*\\\\Bye(\W.*)'
bye = compile(s)

# Thanks to Karl Berry for enabling this:
s = '\A[^\%]*\\\\note\W+(\d+)\s+([^\$]*\$)(.*)'
note = compile(s)

s = '(?<!\\\\)(?P<TeXcomment>%)|(\\\\noparse)'
Noparse = compile(s)

s = '(?<!\\\\)(%)'
TeXcomment = compile(s)

s = r'(?<!\\)(\$+)'
TeXdollar = compile(s)

#Note the stars must be slashed.
math_environments = ['math', 'displaymath', 'eqnarray', 'eqnarray\*', 'align', 'align\*']
s = r'((?<!\\)(\$+)|\\\(|\\\['
t = r'((\.(\$+))|(?<!\\)(\$+)|\\\)|\\\]'  #Note: a period is allowed!!
S = r'((?<!\\)%|(?<!\\)(\$+)|\\\(|\\\['
T = r'((?<!\\)%|(?<!\\)(\$+)|\\\)|\\\]'
for x in math_environments:
	s = s + r'|\\begin{' + x + '}'
	S = S + r'|\\begin{' + x + '}'
	t = t + r'|\\end{' + x + '}'
	T = T + r'|\\end{' + x + '}'
s = s + ')'
S = S + ')'
t = t + ')'
T = T + ')'
beginmath = compile(s)
endmath = compile(t)
beginmath_or_comment = compile(S)
endmath_or_comment = compile(T)

blankline = compile(r'(\s*)(\$+)')

hwhite = compile(r'[\t ]')

leftmargin = compile(r'[\t ]*|\\line[a-z][\t ]*|\\hskip(\d)+pt[\t ]*')

ref=compile('\A((\d*)(\.\d+)+)([a-z]*)(.*)')

outfileref = compile('\A((\d+)(\.\d+)+)([a-z]+)(.*)')

propref = compile("(\A|\D)((\d+)((\.\d+)+))(\Z|[^\.a-z0-9])")

s = '(\A|.*\D)\.(\d+)(.*)'
noteref = compile(s)

s = '\A\$([^\$]*)\$(.*)'
TeXmath = compile(s)


s = '\A[^\$]*\\\\noparse.*'
noparse = compile(s)

s = 'z_\{(\d+)\}'
bvar = compile(s)

s = '\A\\\[qw]\^\{(\d+)\}_\{(\d+)\}'
newschem = compile(s)

s = '\A\\\([pqr]+)var'
gensent = compile(s)

s = '\A\\\([pqr]+|[uvw]+)bar(p*)'
genschem = compile(s)
# A Token consists of 
#
#Either:
#    1. a sequence of digits
#OR
#    2. One of the following punctuation marks:
#         .  <  >  ;  /  :  [  ]  (  +  )  =  -  * , &
#OR
#    3. One of these slashed TeX symbols:
#         \{  \}  \.   \_  \&  \%  \# \, \> \; \!
#OR
#    4. Either:
#            a.  A single letter
#OR 
#            b.  A pair of braces { } enclosing non-brace characters
#OR
#            c.  An alphabetic control sequence, a backslash followed by letters
#
#       optionally followed by
#            d.  A prime sequence, a backslash followed by a sequence of p's not
#                followed by a letter
#
#       optionally followed by any number of sequences consisting of:
#            e.  A TeX superscript ^ or subscript _
#      
#            followed by
#            Either
#                i) a pair of braces { } enclosing non-brace characters
#            OR 
#                ii) an alphabetic control sequence, a backslash followed by letters
#            OR
#                iii) any single non-slash character

s = "(\s*)(\d+|\
[\.<>;/:\[\]\(\+\)=\-\*\,&\{\}\|]|\
\\\[\{\._\}\&\%\,<;\!\#]|\
([A-Za-z]|\{[^\{]*\}|\\\[A-Za-z]+)\
(?:\\\p+(?![A-Za-z]))?(?:[\^_](?:\{[^\{]*\}|\\\\[A-Za-z]+|[^\\\\]))*)(\s*)"
token = compile(s) 
# token = pattern.token.match.group(2) 
# variable stripped of decoration = pattern.token.match.group(3)

s = r"\A\s*(\$[^\$]?\$\s*[;:\(\)\+\-]*\s*)*\\C\s*(\$[^\$]+\$)\s*\Z"
s = r"\A\s*(\$[^\$]+\$\s*,?\s*)*\\C\s*(\$[^\$]+\$)\s*\Z"
s = r"\A\s*((\$[^\$]+\$\s*)|([SHU\;\:\!\(\)\+\-\,]+\s*))*\\C\s*(\$[^\$]+\$)\s*\Z"
inference_rule = compile(s)

#These are not accepted as tokens yet so they do not work:
s = r"(\\(bigl|Bigl|biggl|Biggl|left))?(\(|\[|\\lfloor|\\lceil|\\langle)"
TeX_leftdelimiter = compile(s) 
s = r"(\\(bigr|Bigr|biggr|Biggr|right))?(\(|\[|\\rfloor|\\rceil|\\rangle)"
TeX_rightdelimiter = compile(s)

rmatchd = { ')':'(',']':'[','}':'{','rfloor':'lfloor','rceil':'lceil','rangle':'langle'}

rmatchp = compile('\)|\]|rfloor|rceil|rangle')


def rmatchf(x):
	if x.group(0) in rmatchd: return rmatchd[x.group(0)]

def rmatch(x):
	return rmatchp.sub(rmatchf,x)
 
#ignore_token = compile(r"\A\\,|\\>|\\;|\\!|\\cr|&\Z")

s = "\A([^\d\s\.\$]*)(.*)"
puncts = compile(s)

s = "\A[^\$]*\$(.*)"
dollar = compile(s)

s = "\A([^,\)\(]*)([,\)\(]+.*)"
findsingle = compile(s)

s = '(?<!\\\\)(\\\\[A-Za-z]+)'
TeX_macro = compile(s)

nums = compile("(\d+)")

if __name__ == '__main__':
	repeat = "yes"
	while repeat:
		repeat = input("Enter possible token string: ")
		t = token.match(repeat)
		if t:
						print("token groups == ", t.groups())
						print("t.end(0) == ", t.end(0))

#		t = TeXdollar.match(repeat)
#		s = beginmath_or_comment.match(repeat)
#		u = endmath_or_comment.match(repeat)
#		if t:
#			print("TeXdollar groups")
#			print(t.group(1))
#			print(t.groups())
#		if s:
#			print("beginmath groups")
#			print(s.group(1))
#			print(s.groups())
#		if u:
#			print("endmath groups")
#			print(u.group(1))
#			print(u.groups())

