Thank you for the explanation, rowett.
I have tried to cut down the dependency of my script but a test set would definitely be helpful.
1. The canonlise() function is not fully implemented yet. Currently no minus sign is allowed.
2. The 102 configurations are yet to be projected back to the original 512 configs. ( preferable as a separate set that deals with bitstrings explicitly, since 102-bitstring is equally legitimate as the 512 one )
Code: Select all
test_alias = 'B13-ce2kS2e'
identity = lambda x:x
base2bin=lambda data,scale,num_of_bits: bin(int(data, scale))[2:].zfill(num_of_bits);
hex2bin=lambda hexdata,num_of_bits: base2bin(hexdata,16,num_of_bits);
bin2hex = lambda bitstr:hex(int(bitstr,2)).lstrip('0x').rstrip('L')
import itertools, collections, re
count = collections.Counter
# import numpy as np
# import copy
# import random
# from astropy.convolution import convolve
# convolve_int=lambda a,fir,method:np.around(convolve(a,fir,method)).astype(np.int);
# import scipy.ndimage
# convolve_int=lambda a,fir,method,**kwargs:scipy.ndimage.filters.convolve(a,fir,mode = method,**kwargs)
hensellist=['b0_','b1c','b1e','b2a','b2c','b3i','b2e','b3a','b2k','b3n','b3j','b4a','s0_','s1c','s1e','s2a','s2c','s3i','s2e','s3a','s2k','s3n','s3j','s4a','b2i','b3r','b3e','b4r','b4i','b5i','s2i','s3r','s3e','s4r','s4i','s5i','b2n','b3c','b3q','b4n','b4w','b5a','s2n','s3c','s3q','s4n','s4w','s5a','b3y','b3k','b4k','b4y','b4q','b5j','b4t','b4j','b5n','b4z','b5r','b5q','b6a','s3y','s3k','s4k','s4y','s4q','s5j','s4t','s4j','s5n','s4z','s5r','s5q','s6a','b4e','b5c','b5y','b6c','s4e','s5c','s5y','s6c','b5k','b6k','b6n','b7c','s5k','s6k','s6n','s7c','b4c','b5e','b6e','s4c','s5e','s6e','b6i','b7e','s6i','s7e','b8_','s8_',];
henselidx={k: v for v, k in enumerate(hensellist)}
rca2ntca=[0, 1, 2, 3, 1, 4, 3, 5, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13, 16, 15, 17, 14, 15, 18, 19, 20, 21, 22, 23, 2, 8, 6, 10, 3, 9, 7, 11, 24, 25, 26, 27, 25, 28, 27, 29, 14, 20, 18, 22, 15, 21, 19, 23, 30, 31, 32, 33, 31, 34, 33, 35, 1, 4, 8, 9, 36, 37, 38, 39, 3, 5, 10, 11, 38, 39, 40, 41, 13, 16, 20, 21, 42, 43, 44, 45, 15, 17, 22, 23, 44, 45, 46, 47, 8, 48, 49, 50, 38, 51, 52, 53, 25, 54, 55, 56, 57, 58, 59, 60, 20, 61, 62, 63, 44, 64, 65, 66, 31, 67, 68, 69, 70, 71, 72, 73, 2, 8, 24, 25, 8, 48, 25, 54, 6, 10, 26, 27, 49, 50, 55, 56, 14, 20, 30, 31, 20, 61, 31, 67, 18, 22, 32, 33, 62, 63, 68, 69, 6, 49, 26, 55, 10, 50, 27, 56, 26, 55, 74, 75, 55, 76, 75, 77, 18, 62, 32, 68, 22, 63, 33, 69, 32, 68, 78, 79, 68, 80, 79, 81, 3, 9, 25, 28, 38, 51, 57, 58, 7, 11, 27, 29, 52, 53, 59, 60, 15, 21, 31, 34, 44, 64, 70, 71, 19, 23, 33, 35, 65, 66, 72, 73, 10, 50, 55, 76, 40, 82, 59, 83, 27, 56, 75, 77, 59, 83, 84, 85, 22, 63, 68, 80, 46, 86, 72, 87, 33, 69, 79, 81, 72, 87, 88, 89, 1, 36, 8, 38, 4, 37, 9, 39, 8, 38, 49, 52, 48, 51, 50, 53, 13, 42, 20, 44, 16, 43, 21, 45, 20, 44, 62, 65, 61, 64, 63, 66, 3, 38, 10, 40, 5, 39, 11, 41, 25, 57, 55, 59, 54, 58, 56, 60, 15, 44, 22, 46, 17, 45, 23, 47, 31, 70, 68, 72, 67, 71, 69, 73, 4, 37, 48, 51, 37, 90, 51, 91, 9, 39, 50, 53, 51, 91, 82, 92, 16, 43, 61, 64, 43, 93, 64, 94, 21, 45, 63, 66, 64, 94, 86, 95, 9, 51, 50, 82, 39, 91, 53, 92, 28, 58, 76, 83, 58, 96, 83, 97, 21, 64, 63, 86, 45, 94, 66, 95, 34, 71, 80, 87, 71, 98, 87, 99, 3, 38, 25, 57, 9, 51, 28, 58, 10, 40, 55, 59, 50, 82, 76, 83, 15, 44, 31, 70, 21, 64, 34, 71, 22, 46, 68, 72, 63, 86, 80, 87, 7, 52, 27, 59, 11, 53, 29, 60, 27, 59, 75, 84, 56, 83, 77, 85, 19, 65, 33, 72, 23, 66, 35, 73, 33, 72, 79, 88, 69, 87, 81, 89, 5, 39, 54, 58, 39, 91, 58, 96, 11, 41, 56, 60, 53, 92, 83, 97, 17, 45, 67, 71, 45, 94, 71, 98, 23, 47, 69, 73, 66, 95, 87, 99, 11, 53, 56, 83, 41, 92, 60, 97, 29, 60, 77, 85, 60, 97, 85, 100, 23, 66, 69, 87, 47, 95, 73, 99, 35, 73, 81, 89, 73, 99, 89, 101];
# rca2ntca=np.array(rca2ntca,np.int);
#### Obsolete ####
henseldict={'0': ['_'],
'1': ['c', 'e'],
'2': ['a', 'c', 'e', 'k', 'i', 'n'],
'3': ['i', 'a', 'n', 'j', 'r', 'e', 'c', 'q', 'y', 'k'],
'4': ['a', 'r', 'i', 'n', 'w', 'k', 'y', 'q', 't', 'j', 'z', 'e', 'c'],
'5': ['i', 'a', 'j', 'n', 'r', 'q', 'c', 'y', 'k', 'e'],
'6': ['a', 'c', 'k', 'n', 'e', 'i'],
'7': ['c', 'e'],
'8': ['_']}
#### New tool ###
hensel2ntca = {"b":
{"0": {"_": 0},
"1": {"c": 1, "e": 2},
"2": {"a": 3, "c": 4, "e": 6, "k": 8, "i": 24, "n": 36},
"3": {"i": 5, "a": 7, "n": 9, "j": 10, "r": 25, "e": 26, "c": 37, "q": 38, "y": 48, "k": 49},
"4": {"a": 11, "r": 27, "i": 28, "n": 39, "w": 40, "k": 50, "y": 51, "q": 52, "t": 54, "j": 55, "z": 57, "e": 74, "c": 90},
"5": {"i": 29, "a": 41, "j": 53, "n": 56, "r": 58, "q": 59, "c": 75, "y": 76, "k": 82, "e": 91},
"6": {"a": 60, "c": 77, "k": 83, "n": 84, "e": 92, "i": 96},
"7": {"c": 85, "e": 97},
"8": {"_": 100}
},
"s": {"0": {"_": 12},
"1": {"c": 13, "e": 14},
"2": {"a": 15, "c": 16, "e": 18, "k": 20, "i": 30, "n": 42},
"3": {"i": 17, "a": 19, "n": 21, "j": 22, "r": 31, "e": 32, "c": 43, "q": 44, "y": 61, "k": 62},
"4": {"a": 23, "r": 33, "i": 34, "n": 45, "w": 46, "k": 63, "y": 64, "q": 65, "t": 67, "j": 68, "z": 70, "e": 78, "c": 93},
"5": {"i": 35, "a": 47, "j": 66, "n": 69, "r": 71, "q": 72, "c": 79, "y": 80, "k": 86, "e": 94},
"6": {"a": 73, "c": 81, "k": 87, "n": 88, "e": 95, "i": 98},
"7": {"c": 89, "e": 99},
"8": {"_": 101}
}
}
subconf='_cekainyqjrtwz';
p_NOTnumletter = re.compile(r'[^\da-zA-Z]')
# try:
# from data import *
# with open('tp','rb') as f: # Python 3: open(..., 'rb')
# hdist, tst_data = pickle.load(f)
# hdist = np.array(hdist).reshape([512,512]);
# except:
# print('[WARN]Not finding data.py')
def invert(s):
'''
Invert a configuration according to hensel dict
(TBC) To be modified to include "-" sign explicitly
'''
num = s[0]
conf = henseldict[num]
return ''.join([num]+[x for x in conf if x not in s])
def padsplit(s,pattern):
'''
Pad a string after index before split
Strip padding after split
'''
# pattern = '([bs])'
# s = test_alias.lower()
s = re.sub( pattern, '\\1 ',s)
lst = re.split( pattern,s)[1:]
return [x.strip() for x in lst]
def ntuple(lst,n):
"""ntuple([0,3,4,10,2,3], 2) => [(0,3), (4,10), (2,3)]
Group a list into consecutive n-tuples. Incomplete tuples are
discarded e.g.
>>> group(range(10), 3)
[(0, 1, 2), (3, 4, 5), (6, 7, 8)]
Source: http://code.activestate.com/recipes/303060-group-a-list-into-sequential-n-tuples/
"""
return zip(*[lst[i::n] for i in range(n)])
class kb_2dntca():
def __init__(self):
self.familyname='2dntca'
pass
def rulestr2alias(self, rulestr):
OUT = ''
# rulestr = '000000000060031c61c67f86a0'
r=hex2bin(rulestr,102);
r=r[::-1];
rule=[i for i,x in enumerate(r) if x=='1'];
# print r
lst = [hensellist[i] for i in rule]
lst.sort()
d = collections.OrderedDict((('b',{}),('s',{}))) ### set default
#### group by B/S
d.update(
{k:list(gp) for k,gp in itertools.groupby(lst, lambda x:x[0])}
)
#### group by number
for k,lst in d.items():
d[k] = {k:list(gp) for k,gp in itertools.groupby(lst, lambda x:x[1])}
for bs, dd in d.items():
OUT += bs
for k,lst in dd.items():
OUT += k + ''.join( conf[2] for conf in lst)
OUT = OUT.replace('_','')
alias = OUT
return alias
def alias2rulestr(self,alias):
### Remove any "-" sign
alias = re.sub('(\d-[a-zA-Z]+)',lambda o:invert(o.group()),alias)
#### Remove suffix
alias = alias.split(':')[0]
##### Remove things like _
alias = p_NOTnumletter.sub( '', alias).lower()
OUT = [0]*102
#### Actual conversion
idxs = []
bsdict = dict(ntuple(padsplit(alias.lower(),'([bs])'),2))
for bs, val in bsdict.items():
numdict = dict(ntuple(padsplit(val,'(\d)'),2))
for num,letters in numdict.items():
candidate = hensel2ntca.get(bs).get(num)
if len(letters)==0:
idxs += candidate.values()
else:
idxs += [candidate.get(let) for let in letters]
for i in idxs:
OUT[i] = 1-OUT[i]
bitstr=''.join(map(str,OUT[::-1]));
hexstr = bin2hex(bitstr).zfill(26)
return hexstr
def canonlise(self,alias):
alias = self.rulestr2alias(
self.alias2rulestr(alias)
)
return alias
pass
kb = kb_2dntca()
print test_alias
print kb.canonlise(test_alias)