Sophie

Sophie

distrib > Mageia > 6 > x86_64 > media > core-release > by-pkgid > 0d4b03f6c024599542f359647823b03c > files > 35

python-egenix-mx-base-doc-3.2.9-2.mga6.noarch.rpm

#!/usr/bin/python

""" RegExp - tag a string using regexps (Version 0.1)
    
    Copyright (c) 2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    Copyright (c) 2000-2015, eGenix.com Software GmbH; mailto:info@egenix.com
    See the documentation for further information on copyrights,
    or contact the author. All Rights Reserved.
"""

# engine + constants
from mx.TextTools import *

# special matching function
def match_regexp(text,position,len_text,regexpr):
    position = position + max(0,regexpr.match(text,position).end())
    return position

# create a table in the sense re_1\|re_2\|...\|re_N where
# re_i \in regexps
def or_regexps(regexps):
    # regexps = list of compiled regexps
    l = []
    for i in range(len(regexps)):
        l.append((i,CallArg,(match_regexp,regexps[i]),+1,MatchOk))
    l.append((None,Fail,Here))
    return tuple(l)

if __name__ == '__main__':

    # create some simple regexps
    import re
    regexps = [ 'spam*', 'ham*', 'eggs' ]
    regexps = map(re.compile, regexps)
    table = or_regexps(regexps)

    text = raw_input('type some words: ')
    result, taglist, nextindex = tag(text,table)

    if result:
        print 'subexpr nr.',taglist[0][0],'matched:',taglist[0]
    else:
        print 'no match'

    if nextindex < len(text): 
        print 'rest of unparsed input:',text[nextindex:]