Urgh... Finally got it to work. Here it is. @Codeholic Great job, especially for the user interface.
I'm not good at naming the params, and the -i option especially seems to bad (as it has nothing to do with the original sed -i) Any naming suggestions?
EDIT: changed the param names, and added a little functionality, but the code became ugly...
Code: Select all
#!/usr/bin/python
from argparse import ArgumentParser, FileType
import re
import sys
RE_META = re.compile(r'#')
RE_POSITION = re.compile(r'#P[\t\040]+([-0-9]+)[\t\040]+([-0-9]+)')
def parse_pattern(patfile):
x0, y0 = 0, 0
y = y0
pattern = dict()
for line in patfile:
line = line.rstrip()
if RE_META.match(line):
match = RE_POSITION.match(line)
if match:
x0, y0 = int(match.group(1)), int(match.group(2))
y = y0
continue
for x, c in enumerate(line, x0):
if c == ' ':
continue
pattern[x, y] = c
y += 1
return pattern
def get_pattern_bounding_box(pattern):
coords = pattern.keys()
xs, ys = zip(*coords)
return min(xs), min(ys), max(xs), max(ys)
def linearize_pattern(pattern):
xmin, ymin, xmax, ymax = get_pattern_bounding_box(pattern)
result = [['.'] * (xmax-xmin+1) for _ in range(ymax-ymin+1)]
for (x, y), c in pattern.iteritems():
result[y - ymin][x - xmin] = c
return '!'.join(''.join(line).rstrip('.') for line in result) + '!'
def strip_empty_cells(pattern):
return {coords: cell for coords, cell in pattern.iteritems() if cell != '.'}
def replace_subpattern(needle, replacement, haystack, g, n, ne):
if ne:
needle = strip_empty_cells(needle)
replacement = strip_empty_cells(replacement)
haystack = strip_empty_cells(haystack)
if not haystack:
return None
for (x, y), c in needle.iteritems():
if c != '.':
pivot = x, y
break
else:
return None
# Put the needle at (x, y) and store (x, y) if it matches the haystack.
matches = []
for x, y in haystack.keys():
for (i, j), c in needle.iteritems():
p, q = x+i-pivot[0], y+j-pivot[1]
if c == '.':
if (p, q) in haystack:
break
elif haystack.get((p, q)) != c:
break
else: # Match succeeded
matches.append((x, y))
if not g:
break
if not matches:
return None if n else haystack
result = dict(haystack)
# Remove the matches of needles.
for (x, y) in matches:
for (i, j), c in needle.iteritems():
p, q = x+i-pivot[0], y+j-pivot[1]
if c != '.':
result.pop((p, q), None)
# Put the replacements.
for (x, y) in matches:
for (i, j), c in replacement.iteritems():
p, q = x+i-pivot[0], y+j-pivot[1]
if replacement[i, j] == '.':
result.pop((p, q), None)
else:
result[p, q] = replacement[i, j]
return result
parser = ArgumentParser(description='Two-dimensional match/replace.')
parser.add_argument('matchfile', help='pattern to match')
parser.add_argument('substfile', help='substitution pattern')
parser.add_argument('-F', type=int, default=0, metavar='field', help='field to modify (default=0)')
parser.add_argument('-g', action='store_true', help='replace all occurrences')
parser.add_argument('-n', action='store_true', help='output successful matches only')
parser.add_argument('-ne', action='store_true', help='only match non-empty cells')
args = parser.parse_args()
with open(args.matchfile, 'r') as needle_file:
needle = parse_pattern(needle_file)
with open(args.substfile, 'r') as replacement_file:
replacement = parse_pattern(replacement_file)
for line in sys.stdin:
meta = line.rstrip().split(' ')
pattern = meta.pop(args.F)
result = replace_subpattern(needle, replacement, parse_pattern(pattern.split('!')), args.g, args.n, args.ne)
if result:
meta.insert(args.F, linearize_pattern(result))
print ' '.join(meta)
Invoke
sed2d.py -h for instructions.
EDIT: code prettified for the cost of some functionality, which I think to be unimportant.