To attend:
https://zchg.org/t/web-4-0-roadmap/436
Base4096 Deterministic
import math
from functools import lru_cache
# Define operator functions and symbols
binary_ops = {
'+': lambda a, b: a + b,
'-': lambda a, b: a - b,
'*': lambda a, b: a * b,
'/': lambda a, b: a // b if b != 0 else None,
'%': lambda a, b: a % b if b != 0 else None,
'^': lambda a, b: a ** b if b >= 0 else None,
'&': lambda a, b: a & b,
'|': lambda a, b: a | b,
}
unary_ops = {
'~': lambda a: ~a
}
# AST node types
class Expr:
def eval(self): raise NotImplementedError
def serialize(self): raise NotImplementedError
class Literal(Expr):
def __init__(self, value):
self.value = value
def eval(self): return self.value
def serialize(self): return f"L{self.value}"
class UnaryOp(Expr):
def __init__(self, op, expr):
self.op = op
self.expr = expr
def eval(self): return unary_ops[self.op](self.expr.eval())
def serialize(self): return f"U{self.op}{self.expr.serialize()}"
class BinaryOp(Expr):
def __init__(self, op, left, right):
self.op = op
self.left = left
self.right = right
def eval(self): return binary_ops[self.op](self.left.eval(), self.right.eval())
def serialize(self): return f"O{self.op}{self.left.serialize()}{self.right.serialize()}"
# Optimized expression builder with depth control
@lru_cache(maxsize=None)
def build_expr(n, max_depth=4):
if max_depth == 0 or n < 0:
return None
best = Literal(n)
# Try unary operators
for op in unary_ops:
inv = None
try:
inv = unary_ops[op](n)
except:
continue
subexpr = build_expr(inv, max_depth - 1)
if subexpr:
cand = UnaryOp(op, subexpr)
if cand.eval() == n and len(cand.serialize()) < len(best.serialize()):
best = cand
# Try binary operations
for op in binary_ops:
for a in range(1, n + 1):
b = None
try:
b = binary_ops[op](a, n - a)
except:
continue
if b is None: continue
left = build_expr(a, max_depth - 1)
right = build_expr(n - a, max_depth - 1)
if left and right:
cand = BinaryOp(op, left, right)
try:
if cand.eval() == n and len(cand.serialize()) < len(best.serialize()):
best = cand
except:
continue
return best
# Encoding and decoding interface
def encode(n):
expr = build_expr(n)
return expr.serialize() if expr else None
def decode(s):
def parse(index):
token = s[index]
if token == 'L':
num = ''
i = index + 1
while i < len(s) and s[i].isdigit():
num += s[i]
i += 1
return Literal(int(num)), i
elif token == 'U':
op = s[index + 1]
subexpr, i = parse(index + 2)
return UnaryOp(op, subexpr), i
elif token == 'O':
op = s[index + 1]
left, i = parse(index + 2)
right, j = parse(i)
return BinaryOp(op, left, right), j
else:
raise ValueError("Invalid token")
expr, _ = parse(0)
return expr.eval()
# Example
if __name__ == '__main__':
for n in range(1, 20):
encoded = encode(n)
decoded = decode(encoded)
print(f"{n} => {encoded} => {decoded}")
Expression Alphabet
import hashlib
import operator
from typing import Union, Callable, List, Optional, Dict, Tuple
import itertools
import math
# --- Define atomic value types ---
Expr = Union[int, str, 'Node']
class Node:
def __init__(self, op_name: str, operands: List[Expr], depth: int = 1):
self.op_name = op_name
self.operands = operands
self.depth = depth
def __repr__(self):
return f"{self.op_name}({', '.join(repr(o) for o in self.operands)})"
# --- Meta-grammar for operator evolution ---
BASE_OPERATORS = {
'add': operator.add,
'mul': operator.mul,
'xor': operator.xor,
'pow': pow
}
DERIVED_OPERATORS: Dict[str, Callable] = {}
DEPTH_OPERATORS: Dict[int, List[str]] = {}
# Register hybrid or depth-aware operators
def register_operator(name: str, func: Callable, depth: int):
if name not in BASE_OPERATORS and name not in DERIVED_OPERATORS:
DERIVED_OPERATORS[name] = func
if depth not in DEPTH_OPERATORS:
DEPTH_OPERATORS[depth] = []
DEPTH_OPERATORS[depth].append(name)
# Generate derived operators up to a given depth
def generate_derived_operators(max_depth=3):
for depth in range(2, max_depth + 1):
for ops in itertools.product(BASE_OPERATORS, repeat=depth):
name = '_'.join(ops)
def make_func(ops=ops):
def combo(a, b):
result = a
for op in ops:
result = BASE_OPERATORS[op](result, b)
return result
return combo
register_operator(name, make_func(), depth)
# --- Evaluate any expression node or literal ---
def evaluate(expr: Expr) -> int:
if isinstance(expr, int):
return expr
if isinstance(expr, str):
return int(hashlib.sha256(expr.encode()).hexdigest(), 16) % 4096
if isinstance(expr, Node):
fn = BASE_OPERATORS.get(expr.op_name) or DERIVED_OPERATORS.get(expr.op_name)
if fn is None:
raise ValueError(f"Unknown operator: {expr.op_name}")
args = [evaluate(o) for o in expr.operands]
return fn(*args)
raise TypeError(f"Unsupported expr: {expr}")
# --- Grammar-guided expression generator ---
def generate_expressions(depth: int = 2) -> List[Node]:
literals = list(range(0, 10))
exprs: List[Expr] = literals[:]
all_nodes = []
for d in range(1, depth + 1):
ops = list(BASE_OPERATORS.keys()) + DEPTH_OPERATORS.get(d, [])
new_exprs = []
for op in ops:
for a, b in itertools.product(exprs, repeat=2):
node = Node(op, [a, b], depth=d)
all_nodes.append(node)
new_exprs.append(node)
exprs.extend(new_exprs)
return all_nodes
# --- Match expression to a target string hash ---
def find_expression_for_string(target: str, max_depth: int = 3) -> Optional[Node]:
target_hash = int(hashlib.sha256(target.encode()).hexdigest(), 16) % 4096
candidates = generate_expressions(depth=max_depth)
for expr in candidates:
try:
if evaluate(expr) == target_hash:
return expr
except Exception:
continue
return None
# --- Canonical encoder/decoder ---
def encode_string_as_expression(s: str, max_depth: int = 3) -> Node:
expr = find_expression_for_string(s, max_depth)
if expr is None:
raise ValueError("Could not encode string within given depth")
return expr
def decode_expression(expr: Node) -> str:
val = evaluate(expr)
return f"string_with_hash_{val}"
# --- Full document encoding ---
def encode_document(text: str, max_depth: int = 3) -> List[Node]:
lines = text.splitlines()
expressions = []
for line in lines:
try:
expr = encode_string_as_expression(line, max_depth)
expressions.append(expr)
except ValueError:
expressions.append(None)
return expressions
def decode_document(expressions: List[Optional[Node]]) -> List[str]:
return [decode_expression(expr) if expr else "UNENCODABLE" for expr in expressions]
# --- Recursive assembler for higher-level encoding ---
def assemble_expressions(expressions: List[Node], max_depth: int = 4) -> Optional[Node]:
expr_strs = [repr(expr) for expr in expressions]
combined_str = '|'.join(expr_strs)
return find_expression_for_string(combined_str, max_depth=max_depth)
# --- Expand system ---
generate_derived_operators(max_depth=4)
# --- Example usage ---
if __name__ == "__main__":
doc = """
In the beginning God created the heavens and the earth.
And the earth was without form, and void; and darkness was upon the face of the deep.
And the Spirit of God moved upon the face of the waters.
"""
expressions = encode_document(doc, max_depth=3)
print("-- Level 1 Expressions --")
for expr in expressions:
print("Expr:", expr)
print("Decoded:", decode_expression(expr) if expr else "Failed")
top_expr = assemble_expressions([e for e in expressions if e], max_depth=4)
print("\n-- Level 2 Unified Expression --")
print("Expr:", top_expr)
print("Decoded:", decode_expression(top_expr) if top_expr else "Failed")
Deterministic Encoder
import math
import hashlib
from functools import lru_cache
# --- Stage 1: Alphabet Growth Estimator ---
# Define operator set with their arity (number of arguments)
OPERATORS = {
'+': 2,
'*': 2,
'^': 2,
'-': 2,
'/': 2,
'%': 2,
'neg': 1,
'sqrt': 1,
'log': 1,
'exp': 1,
'max': 2,
'min': 2,
'sum': -1, # variable arity
'prod': -1,
}
# Estimate number of expressions possible at given depth
@lru_cache(maxsize=None)
def count_expressions(depth):
if depth == 0:
return 1 # base literal (a symbol or constant)
total = 0
for op, arity in OPERATORS.items():
if arity == -1:
# Assume max 3 args for variadic ops
total += sum(math.comb(count_expressions(depth - 1) + i - 1, i)
for i in range(2, 4))
else:
total += count_expressions(depth - 1) ** arity
return total
# --- Stage 2: Approximate-Index Expression Grammar ---
def string_to_fingerprint(s):
h = hashlib.sha512(s.encode()).digest()
return int.from_bytes(h, byteorder='big')
def build_expression_from_index(index, max_depth):
"""
Given a numeric index and max depth, deterministically build an expression.
This is a placeholder: you’ll replace this with full grammar walking.
"""
if max_depth == 0:
return f"{index % 10}" # just a leaf literal
op_keys = list(OPERATORS.keys())
op = op_keys[index % len(op_keys)]
arity = OPERATORS[op] if OPERATORS[op] != -1 else 2 # simplify
sub_indices = [(index // (len(op_keys) ** (i + 1))) % 997 for i in range(arity)]
args = [build_expression_from_index(i, max_depth - 1) for i in sub_indices]
return f"({op} {' '.join(args)})"
# --- Stage 3: Resolver ---
def encode_string(s, max_depth=5):
fp = string_to_fingerprint(s)
expr = build_expression_from_index(fp, max_depth)
return expr
# --- Stage 4: Inverse Hash Decoder (Approximate) ---
def decode_expression_to_fingerprint(expr):
"""
Reverse hashing isn't feasible, so instead we approximate by comparing
hashes from candidate expressions until one matches.
"""
raise NotImplementedError("Perfect reverse decoding is not feasible via SHA512. Consider storing a lookup table or reversible grammar.")
if __name__ == '__main__':
test_str = "HELLO WORLD, THIS IS A TEST OF THE RECURSIVE ENCODER!"
est_size = count_expressions(5)
print(f"Total expressible characters (depth 5): {est_size}")
print("Encoded expression:")
print(encode_string(test_str))
Recursive Codec Roadmap
# Recursive Codec Full Roadmap Implementation
# Step-by-step expansion of deterministic nested operator grammar codec.
# Author: Josef Kulovany - ZCHG.org
from typing import Union, List
import operator
# -------------------
# 1. Core Expression Data Structure
# -------------------
class ExprChar:
def __init__(self, op: str, operands: List[Union[int, 'ExprChar']]):
self.op = op
self.operands = operands
def __repr__(self):
return f"{self.op}({', '.join(map(str, self.operands))})"
# -------------------
# 2. Grammar and Operator Set Expansion
# -------------------
basic_ops = {
'+': operator.add,
'*': operator.mul,
'^': operator.pow,
'-': operator.sub,
'/': lambda a, b: a // b if b != 0 else 1,
'%': lambda a, b: a % b if b != 0 else 0
}
# Future: Extend with logical, bitwise, and hybrid operators
# -------------------
# 3. Evaluation
# -------------------
def evaluate(expr: Union[int, ExprChar]) -> int:
if isinstance(expr, int):
return expr
op_func = basic_ops[expr.op]
args = [evaluate(arg) for arg in expr.operands]
result = args[0]
for arg in args[1:]:
result = op_func(result, arg)
return result
# -------------------
# 4. Encoding Integer to Expression Tree (Depth-Limited)
# -------------------
def encode_int(n: int, depth: int = 2) -> ExprChar:
if depth == 0 or n < 2:
return ExprChar('+', [n])
for op in ['*', '+', '^', '-', '/', '%']:
for a in range(1, n):
try:
b = infer_b(n, a, op)
if b is not None and evaluate(ExprChar(op, [a, b])) == n:
return ExprChar(op, [encode_int(a, depth - 1), encode_int(b, depth - 1)])
except:
continue
return ExprChar('+', [n])
def infer_b(result, a, op):
try:
if op == '+': return result - a
if op == '*': return result // a if a != 0 else None
if op == '^': return int(round(math.log(result, a)))
if op == '-': return a - result
if op == '/': return a // result if result != 0 else None
if op == '%': return result if a != 0 else None
except:
return None
# -------------------
# 5. Serialization / Deserialization (Text Format)
# -------------------
def serialize(expr):
if isinstance(expr, int):
return str(expr)
return f"{expr.op}[{','.join(map(serialize, expr.operands))}]"
def deserialize(s: str):
if s.isdigit(): return int(s)
op = s[0]
inner = s[s.find('[')+1:-1]
parts = split_expr(inner)
return ExprChar(op, [deserialize(p) for p in parts])
def split_expr(s):
depth = 0
current = ''
parts = []
for c in s:
if c == '[': depth += 1
if c == ']': depth -= 1
if c == ',' and depth == 0:
parts.append(current)
current = ''
else:
current += c
if current:
parts.append(current)
return parts
# -------------------
# 6. String Encoding via Word Frequency -> Integer Mapping
# -------------------
def string_to_freq_map(s):
from collections import Counter
c = Counter(s)
total = 0
for ch in sorted(c):
total = total * 256 + ord(ch) * c[ch]
return total
# -------------------
# 7. Full Encode/Decode Pipeline
# -------------------
def encode_string(s: str, depth=3) -> str:
n = string_to_freq_map(s)
tree = encode_int(n, depth)
return serialize(tree)
def decode_string(encoded: str) -> int:
tree = deserialize(encoded)
return evaluate(tree)
# -------------------
# 8. Next Up: Nested Compression Layers
# -------------------
# - Encode each encoded expression as its own character via symbolic alphabet
# - Layer deeper (ExprChar of ExprChar of ExprChar...)
# - Full fingerprint and reconstruction path
# Let me know when to begin layering or visual tooling!
Recursive Codec Pipeline
Recursive Codec Pipeline with Layered Encoding
Overview
This pipeline deterministically encodes any string into a unique symbolic representation using a grammar of nested operators. It supports recursion by building higher-order expressions from encoded symbols of smaller segments.
1. Core Components
Alphabet and Operators
basic_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
operators = ['+', '*', '^', '~', '@', '#', '&', '|', '%', '!', ':', '?']
Expression Node (Recursive Expression Grammar)
class ExprNode:
def __init__(self, op, children):
self.op = op
self.children = children
def __str__(self):
return f"({self.op} {', '.join(str(c) for c in self.children)})"
def evaluate(self):
vals = [c.evaluate() if isinstance(c, ExprNode) else c for c in self.children]
if self.op == '+': return sum(vals)
if self.op == '*':
result = 1
for v in vals: result *= v
return result
if self.op == '^':
base, exp = vals
return base ** exp
# Extend with additional logic per operator as needed
return 0
2. Encoding Functions
String to Integer via Hashing
def string_to_integer(s):
value = 0
for i, c in enumerate(s):
value += ord(c) * (len(s) ** i)
return value
Integer to Expression Tree
def int_to_expr(n, depth=0, max_depth=3):
if depth >= max_depth or n < len(basic_alphabet):
return n
op = operators[n % len(operators)]
left = int_to_expr(n // 2, depth + 1, max_depth)
right = int_to_expr(n // 3, depth + 1, max_depth)
return ExprNode(op, [left, right])
Expression to Symbol Mapping (Placeholder)
def expr_to_symbol(expr):
return f"<{hash(str(expr)) & 0xFFFFFF:X}>" # Simulate unique character ID
Full Encode
def encode_string(s):
n = string_to_integer(s)
expr = int_to_expr(n)
return expr_to_symbol(expr)
3. Recursive Layering
Layer Text into Blocks
def layer_text(text, block_size=16):
blocks = [text[i:i+block_size] for i in range(0, len(text), block_size)]
encoded_blocks = [encode_string(b) for b in blocks]
return encode_string(''.join(encoded_blocks))
4. Decoding (Prototype Stage)
Symbol to Expression (Inverse Mapping)
def symbol_to_expr(symbol):
# Placeholder inverse of expr_to_symbol, use deterministic grammar in full version
return ExprNode('+', [1, 2])
Evaluate Expression to Recover Integer
def expr_to_int(expr):
return expr.evaluate() if isinstance(expr, ExprNode) else expr
Integer to Approximate String (Stub)
def int_to_string(n):
# Reconstruct approximate string, requires compression dictionary in full version
return f"String({n})"
Full Decode (Stub)
def decode_symbol(symbol):
expr = symbol_to_expr(symbol)
n = expr_to_int(expr)
return int_to_string(n)
5. Example Usage
text = "HELLO WORLD, THIS IS A TEST OF THE RECURSIVE ENCODER!"
symbol = layer_text(text)
print("Encoded as symbol:", symbol)
Expression Codec
import math
from typing import Union, Dict, Tuple
# Expression Node
type Expr = Union[int, Tuple[str, 'Expr', 'Expr']]
# Global symbol dictionary for encoding/decoding
expr_to_symbol: Dict[str, str] = {}
symbol_to_expr: Dict[str, str] = {}
symbol_base = 0xE000 # Start of Private Use Area
# Operators in precedence order (lower index = higher precedence)
OPERATORS = [
('^', lambda a, b: a ** b),
('*', lambda a, b: a * b),
('+', lambda a, b: a + b)
]
# Encode an expression as a string
def expr_to_str(expr: Expr) -> str:
if isinstance(expr, int):
return str(expr)
op, left, right = expr
return f"({expr_to_str(left)}{op}{expr_to_str(right)})"
# Recursively evaluate an expression
def eval_expr(expr: Expr) -> int:
if isinstance(expr, int):
return expr
op, left, right = expr
for symbol, func in OPERATORS:
if op == symbol:
return func(eval_expr(left), eval_expr(right))
raise ValueError(f"Unknown operator: {op}")
# Generate a unique character for each expression string
def get_symbol(expr: Expr) -> str:
key = expr_to_str(expr)
if key in expr_to_symbol:
return expr_to_symbol[key]
global symbol_base
if symbol_base > 0xF8FF:
raise OverflowError("PUA exhausted")
symbol = chr(symbol_base)
expr_to_symbol[key] = symbol
symbol_to_expr[symbol] = key
symbol_base += 1
return symbol
# Decompose integer into minimal expression using precedence
# This can be improved with heuristics and memoization.
def decompose(n: int) -> Expr:
if n < 10:
return n
# Heuristic: prioritize exponentiation
for b in range(2, int(math.log(n, 2)) + 2):
a = round(n ** (1 / b))
if a ** b == n:
return ('^', decompose(a), decompose(b))
for b in range(2, n // 2 + 1):
if n % b == 0:
return ('*', decompose(n // b), decompose(b))
return ('+', decompose(n - 1), 1)
# Encode a string as a symbol sequence using expression encoding
def encode_string(s: str) -> str:
freq = {}
for c in s:
freq[c] = freq.get(c, 0) + 1
total = sum((ord(c) * count for c, count in freq.items()))
expr = decompose(total)
return get_symbol(expr)
# Decode a symbol back to its expression and evaluate
def decode_symbol(symbol: str) -> int:
expr_str = symbol_to_expr.get(symbol)
if not expr_str:
raise ValueError("Unknown symbol")
# A full parser would be needed here to parse the expression string.
# For now, assume the mapping is trusted.
expr = eval(compile(expr_str, "<string>", "eval"))
return expr
if __name__ == "__main__":
s = "HELLO WORLD"
encoded = encode_string(s)
print(f"Original: {s}")
print(f"Encoded symbol: {encoded}")
print(f"Decoded (raw eval value): {decode_symbol(encoded)}")
Expression Codec Pipeline
# === expression_codec_pipeline.py ===
import hashlib
from collections import defaultdict
# === Expression Node Class ===
class ExprNode:
def __init__(self, op=None, children=None, value=None):
self.op = op # Operator: '+', '*', '^', 'sum', etc.
self.children = children or []
self.value = value # For leaves
def is_leaf(self):
return self.op is None
def serialize(self):
if self.is_leaf():
return self.value
return f"({self.op} {' '.join(child.serialize() for child in self.children)})"
def hash(self):
if self.is_leaf():
return hashlib.sha256(self.value.encode()).hexdigest()
combined = self.op + ''.join(child.hash() for child in self.children)
return hashlib.sha256(combined.encode()).hexdigest()
# === Parser ===
def parse_expr(s):
s = s.strip()
if not s.startswith('('):
return ExprNode(value=s)
s = s[1:-1].strip()
op, rest = s.split(' ', 1)
children = []
depth = 0
token = ''
for c in rest:
if c == '(':
depth += 1
elif c == ')':
depth -= 1
if c == ' ' and depth == 0:
if token:
children.append(parse_expr(token))
token = ''
else:
token += c
if token:
children.append(parse_expr(token))
return ExprNode(op=op, children=children)
# === Tree Construction ===
def build_balanced_tree(s, op='+'):
if len(s) == 1:
return ExprNode(value=s)
mid = len(s) // 2
left = build_balanced_tree(s[:mid], op)
right = build_balanced_tree(s[mid:], op)
return ExprNode(op=op, children=[left, right])
# === Compression / Subtree Reuse ===
def compress_tree(root):
cache = {}
def recurse(node):
h = node.hash()
if h in cache:
return ExprNode(value=f"REF_{h[:6]}")
cache[h] = node
if not node.is_leaf():
node.children = [recurse(c) for c in node.children]
return node
return recurse(root), cache
# === Operator Precedence Extension ===
def build_tree_with_ops(s, ops=['+', '*', '^']):
if len(s) == 1:
return ExprNode(value=s)
op = ops[len(s) % len(ops)] # Rotate ops deterministically
mid = len(s) // 2
left = build_tree_with_ops(s[:mid], ops)
right = build_tree_with_ops(s[mid:], ops)
return ExprNode(op=op, children=[left, right])
# === Symbol Dictionary ===
class SymbolDictionary:
def __init__(self):
self.expr_to_symbol = {}
self.symbol_to_expr = {}
self.counter = 0
def get_symbol(self, expr):
h = expr.hash()
if h not in self.expr_to_symbol:
sym = f"S{self.counter}"
self.counter += 1
self.expr_to_symbol[h] = sym
self.symbol_to_expr[sym] = expr
return self.expr_to_symbol[h]
def get_expr(self, symbol):
return self.symbol_to_expr.get(symbol)
# === Full Encode / Decode ===
def encode(string, ops=['+', '*', '^']):
tree = build_tree_with_ops(string, ops)
compressed, _ = compress_tree(tree)
return compressed.serialize()
def decode(expr_str):
tree = parse_expr(expr_str)
def flatten(node):
if node.is_leaf():
return node.value
return ''.join(flatten(c) for c in node.children)
return flatten(tree)
# === Example Usage ===
if __name__ == "__main__":
test_str = "HELLOWORLD"
encoded = encode(test_str)
print("Encoded Expression:", encoded)
decoded = decode(encoded)
print("Decoded String:", decoded)
Recursive Encoder #1
import hashlib
import itertools
import json
from collections import defaultdict
# --- Symbol Dictionary ---
class SymbolDictionary:
def __init__(self):
self.symbol_counter = 0
self.symbol_to_expr = {}
self.expr_to_symbol = {}
self.base_symbols = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()")
def get_symbol(self, expr):
key = self._fingerprint(expr)
if key in self.expr_to_symbol:
return self.expr_to_symbol[key]
symbol = self._next_symbol()
self.symbol_to_expr[symbol] = expr
self.expr_to_symbol[key] = symbol
return symbol
def _next_symbol(self):
if self.symbol_counter < len(self.base_symbols):
sym = self.base_symbols[self.symbol_counter]
else:
sym = f"@{self.symbol_counter}"
self.symbol_counter += 1
return sym
def _fingerprint(self, expr):
return hashlib.sha256(json.dumps(expr, sort_keys=True).encode()).hexdigest()
# --- Tokenizer ---
def tokenize(text):
return list(text)
# --- Operator Grammar ---
class Operator:
def __init__(self, name, arity, func):
self.name = name
self.arity = arity
self.func = func
def apply(self, args):
return self.func(*args)
# Define simple example operators
OPERATORS = [
Operator("ADD", 2, lambda a, b: a + b),
Operator("MUL", 2, lambda a, b: a * b),
Operator("XOR", 2, lambda a, b: a ^ b),
Operator("SHF", 2, lambda a, b: a << b),
Operator("POW", 2, lambda a, b: a ** b if b < 10 else 1),
]
# --- Expression Compression ---
def compress_tokens(tokens, symdict):
leaves = [symdict.get_symbol({"val": t}) for t in tokens]
while len(leaves) > 1:
new_leaves = []
for i in range(0, len(leaves), 2):
if i + 1 < len(leaves):
op = OPERATORS[i % len(OPERATORS)]
expr = {"op": op.name, "args": [leaves[i], leaves[i + 1]]}
else:
expr = {"val": leaves[i]}
new_leaves.append(symdict.get_symbol(expr))
leaves = new_leaves
return leaves[0]
# --- Decoder ---
def decode_symbol(sym, symdict):
expr = symdict.symbol_to_expr.get(sym)
if not expr:
return sym
if "val" in expr:
return expr["val"]
if "op" in expr:
args = [decode_symbol(a, symdict) for a in expr["args"]]
return ''.join(args)
return "?"
# --- Full Encode/Decode API ---
def encode(text):
symdict = SymbolDictionary()
tokens = tokenize(text)
final_symbol = compress_tokens(tokens, symdict)
return final_symbol, symdict
def decode(symbol, symdict):
return decode_symbol(symbol, symdict)
# --- Example Usage ---
if __name__ == "__main__":
text = "HELLO WORLD"
encoded, symdict = encode(text)
print("Encoded symbol:", encoded)
decoded = decode(encoded, symdict)
print("Decoded string:", decoded)
Recursive Operator Grammar
# Enhanced operator pattern detection
# + Recursive grammar integration scaffolding
from typing import List
from collections import Counter
import re
# Expression Node
class ExprNode:
def __init__(self, op, *args):
self.op = op
self.args = args
def __eq__(self, other):
return isinstance(other, ExprNode) and self.op == other.op and self.args == other.args
def __hash__(self):
return hash((self.op, self.args))
def __repr__(self):
if not self.args:
return f"'{self.op}'"
return f"{self.op}({', '.join(map(repr, self.args))})"
# Detect patterns and choose operators
def detect_patterns(s: str) -> ExprNode:
# Detect perfect repetition: e.g. "abcabcabc" => power(concat('a','b','c'), 3)
n = len(s)
for l in range(1, n // 2 + 1):
if n % l == 0:
repeat_unit = s[:l]
if repeat_unit * (n // l) == s:
return ExprNode('power', detect_patterns(repeat_unit), ExprNode(str(n // l)))
# Detect addition of same character: e.g. "aaaa" => sum('a', 4)
if len(set(s)) == 1:
return ExprNode('sum', ExprNode(s[0]), ExprNode(str(len(s))))
# Detect basic arithmetic numeric strings (optional)
if re.fullmatch(r"\d+(\+\d+)+", s):
parts = s.split('+')
return ExprNode('add_chain', *(ExprNode(p) for p in parts))
# Default to concat tree
if len(s) == 1:
return ExprNode(s)
mid = len(s) // 2
return ExprNode('concat', detect_patterns(s[:mid]), detect_patterns(s[mid:]))
# Basic grammar scaffolding for recursive operator expansion
class OperatorGrammar:
def __init__(self):
self.rules = {
'concat': lambda args: ''.join(args),
'sum': lambda args: args[0] * int(args[1]),
'power': lambda args: args[0] * int(args[1]),
'add_chain': lambda args: str(sum(map(int, args)))
}
def evaluate(self, expr: ExprNode) -> str:
if not expr.args:
return expr.op
evaluated_args = [self.evaluate(arg) for arg in expr.args]
return self.rules[expr.op](evaluated_args)
# Example usage
if __name__ == '__main__':
grammar = OperatorGrammar()
input_str = "abcabcabc"
expr = detect_patterns(input_str)
print("Detected expression:", expr)
reconstructed = grammar.evaluate(expr)
print("Reconstructed:", reconstructed)
assert reconstructed == input_str
Recursive Encoder #2
import hashlib
import json
import string
from typing import Union, Dict, List
# === Base Operators ===
OPERATORS = {
'ADD': lambda x, y: x + y,
'MUL': lambda x, y: x * y,
'POW': lambda x, y: x ** y,
}
# === Derived Operators and Patterns ===
DERIVED_OPERATORS = {
'MAX_DEPTH': lambda x: max(len(str(x)), 1),
'MIRROR': lambda x: int(str(x)[::-1]),
'FOLD': lambda x: sum(int(d) for d in str(x)),
}
# === Tokenizer ===
def tokenize(text: str) -> List[str]:
return text.split() # could be improved with NLP chunking
# === Expression Node ===
class ExprNode:
def __init__(self, op, children):
self.op = op
self.children = children
def evaluate(self):
if self.op in OPERATORS:
return OPERATORS[self.op](*(c.evaluate() if isinstance(c, ExprNode) else c for c in self.children))
elif self.op in DERIVED_OPERATORS:
return DERIVED_OPERATORS[self.op](self.children[0].evaluate() if isinstance(self.children[0], ExprNode) else self.children[0])
def to_dict(self):
return {
'op': self.op,
'children': [c.to_dict() if isinstance(c, ExprNode) else c for c in self.children]
}
def to_string(self):
return json.dumps(self.to_dict(), sort_keys=True)
# === Symbol Dictionary ===
class SymbolDictionary:
def __init__(self):
self.expr_to_symbol: Dict[str, str] = {}
self.symbol_to_expr: Dict[str, str] = {}
self.counter = 0
def _generate_symbol(self):
sym = f"@SYM{self.counter}"
self.counter += 1
return sym
def get_symbol(self, expr: ExprNode) -> str:
key = expr.to_string()
if key not in self.expr_to_symbol:
symbol = self._generate_symbol()
self.expr_to_symbol[key] = symbol
self.symbol_to_expr[symbol] = key
return self.expr_to_symbol[key]
def get_expression(self, symbol: str) -> ExprNode:
if symbol not in self.symbol_to_expr:
raise ValueError(f"Unknown symbol: {symbol}")
return parse_expression(json.loads(self.symbol_to_expr[symbol]))
# === Parser ===
def parse_expression(data: Union[dict, int]) -> ExprNode:
if isinstance(data, int):
return data
op = data['op']
children = [parse_expression(c) for c in data['children']]
return ExprNode(op, children)
# === Expression Encoder ===
def encode_chunk(chunk: str, symbols: SymbolDictionary) -> str:
val = sum(ord(c) for c in chunk)
expr = ExprNode('FOLD', [ExprNode('MUL', [val, 1])])
return symbols.get_symbol(expr)
# === High-Level Compression ===
def compress_text(text: str, symbols: SymbolDictionary) -> str:
chunks = tokenize(text)
chunk_syms = [encode_chunk(chunk, symbols) for chunk in chunks]
expr = ExprNode('ADD', [i for i in chunk_syms])
return symbols.get_symbol(expr)
# === Decompression ===
def decompress_symbol(symbol: str, symbols: SymbolDictionary) -> str:
expr = symbols.get_expression(symbol)
return reconstruct_string(expr)
def reconstruct_string(expr: Union[ExprNode, str]) -> str:
if isinstance(expr, str):
expr = symbols.get_expression(expr)
if isinstance(expr, ExprNode):
if expr.op == 'ADD':
return ' '.join(reconstruct_string(c) for c in expr.children)
elif expr.op == 'FOLD':
val = expr.children[0].children[0] if isinstance(expr.children[0], ExprNode) else expr.children[0]
return chr(val % 256) * (val // 256)
return str(expr)
# === Example Run ===
symbols = SymbolDictionary()
input_text = "HELLO WORLD THIS IS A TEST"
final_symbol = compress_text(input_text, symbols)
print("Final Symbol:", final_symbol)
print("Decompressed:", decompress_symbol(final_symbol, symbols))
Recursive Encoder Expanded
import hashlib
import json
import threading
from collections import defaultdict
# --- Symbol Dictionary (Persistent, Safe) ---
class SymbolDictionary:
def __init__(self):
self.lock = threading.Lock()
self.symbol_to_expr = {}
self.expr_to_symbol = {}
self.next_id = 0
def _generate_symbol(self):
sym = f"\uE{self.next_id:03X}"
self.next_id += 1
return sym
def get_symbol(self, expr):
key = expr.hash()
with self.lock:
if key in self.expr_to_symbol:
return self.expr_to_symbol[key]
sym = self._generate_symbol()
self.symbol_to_expr[sym] = expr
self.expr_to_symbol[key] = sym
return sym
def get_expr(self, symbol):
return self.symbol_to_expr.get(symbol, None)
# --- Expression Node (Recursive Tree with Custom Operators) ---
class ExpressionNode:
def __init__(self, op, children):
self.op = op
self.children = children
def serialize(self):
return {"op": self.op, "children": [c.serialize() if isinstance(c, ExpressionNode) else c for c in self.children]}
def hash(self):
return hashlib.sha256(json.dumps(self.serialize(), sort_keys=True).encode()).hexdigest()
def __repr__(self):
return f"({self.op} {' '.join(map(str, self.children))})"
# --- Expression Builder (Expanded Grammar) ---
class ExpressionBuilder:
def __init__(self):
self.operators = ['SEQ', 'SUM', 'MUL', 'POW', 'DIV', 'XOR'] # Expandable
def build_expr(self, tokens):
if len(tokens) == 1:
return tokens[0]
chunks = [tokens[i:i + 2] for i in range(0, len(tokens), 2)]
nodes = [ExpressionNode(self.operators[i % len(self.operators)], chunk) for i, chunk in enumerate(chunks) if len(chunk) == 2]
while len(nodes) > 1:
a, b = nodes.pop(), nodes.pop()
nodes.append(ExpressionNode(self.operators[(len(nodes)) % len(self.operators)], [a, b]))
return nodes[0]
# --- Codec Interface ---
class RecursiveSymbolicCodec:
def __init__(self):
self.symbols = SymbolDictionary()
self.builder = ExpressionBuilder()
def encode(self, string):
tokens = list(string)
expr_tree = self.builder.build_expr(tokens)
symbol = self.symbols.get_symbol(expr_tree)
return symbol
def decode(self, symbol):
expr = self.symbols.get_expr(symbol)
if not expr:
raise ValueError("Unknown symbol")
return self.eval_expr(expr)
def eval_expr(self, expr):
if isinstance(expr, str):
return expr
evaluated = [self.eval_expr(c) for c in expr.children]
return ''.join(evaluated)
# --- Example Usage ---
if __name__ == '__main__':
codec = RecursiveSymbolicCodec()
s = "HELLO WORLD"
symbol = codec.encode(s)
print(f"Encoded Symbol: {symbol}")
recovered = codec.decode(symbol)
print(f"Recovered: {recovered}")
Parser Module
# parser.py
import re
from typing import List, Union
# Node classes for our expression tree
class ExprNode:
def __init__(self, operator: str, args: List['ExprNode']):
self.operator = operator
self.args = args
def __repr__(self):
return f"{self.operator}({', '.join(map(str, self.args))})"
class LeafNode:
def __init__(self, value: Union[int, str]):
self.value = value
def __repr__(self):
return f"{self.value}"
# Tokenizer for parsing input expression strings
class Tokenizer:
def __init__(self, text: str):
self.tokens = re.findall(r'[A-Za-z_][A-Za-z0-9_]*|\d+|[(),]', text)
self.position = 0
def next(self):
if self.position < len(self.tokens):
tok = self.tokens[self.position]
self.position += 1
return tok
return None
def peek(self):
if self.position < len(self.tokens):
return self.tokens[self.position]
return None
def expect(self, value):
tok = self.next()
if tok != value:
raise SyntaxError(f"Expected '{value}', got '{tok}'")
# Recursive descent parser
class ExpressionParser:
def __init__(self, tokenizer: Tokenizer):
self.tokenizer = tokenizer
def parse_expr(self):
token = self.tokenizer.peek()
if re.match(r'\d+', token): # Leaf integer
return LeafNode(int(self.tokenizer.next()))
name = self.tokenizer.next()
self.tokenizer.expect('(')
args = self.parse_expr_list()
self.tokenizer.expect(')')
return ExprNode(name, args)
def parse_expr_list(self):
args = [self.parse_expr()]
while self.tokenizer.peek() == ',':
self.tokenizer.next()
args.append(self.parse_expr())
return args
# Example usage (unit test style)
if __name__ == '__main__':
test_string = "add(mul(2,3), pow(2,3))"
tokenizer = Tokenizer(test_string)
parser = ExpressionParser(tokenizer)
tree = parser.parse_expr()
print("Parsed Expression Tree:", tree)
Recursive Encoder #3
# recursive_encoder.py
from parser_module import ExpressionParser
from symbol_dict import SymbolDictionary
from expression_tree import ExpressionNode
import hashlib
class RecursiveEncoder:
def __init__(self, symbol_dict=None):
self.parser = ExpressionParser()
self.symbol_dict = symbol_dict or SymbolDictionary()
def encode_string(self, input_string):
# Step 1: Tokenize string to expression
expression_tree = ExpressionNode.from_string(input_string)
# Step 2: Optimize expression using compression heuristics
optimized_tree = expression_tree.optimize()
# Step 3: Serialize to symbolic expression
expression_str = optimized_tree.to_expression()
# Step 4: Use or assign symbol
symbol = self.symbol_dict.get_or_assign_symbol(expression_str)
return symbol
def decode_symbol(self, symbol):
# Step 1: Lookup expression from symbol
expression_str = self.symbol_dict.get_expression(symbol)
if not expression_str:
raise ValueError("Unknown symbol")
# Step 2: Parse expression string
expression_tree = self.parser.parse(expression_str)
# Step 3: Evaluate expression tree to retrieve original string
return expression_tree.evaluate()
# Example usage
if __name__ == '__main__':
encoder = RecursiveEncoder()
original = "HELLO WORLD"
symbol = encoder.encode_string(original)
print("Encoded Symbol:", symbol)
decoded = encoder.decode_symbol(symbol)
print("Decoded String:", decoded)
Grammar Engine Compression
import hashlib
import itertools
from typing import Any, List, Dict, Tuple, Union
# -- Expression Node Definition -- #
class ExpressionNode:
def __init__(self, operator: str, operands: List[Union['ExpressionNode', str, int]]):
self.operator = operator
self.operands = operands
def serialize(self) -> str:
if isinstance(self, SymbolNode):
return self.symbol
inner = ','.join(
operand.serialize() if isinstance(operand, ExpressionNode) else str(operand)
for operand in self.operands
)
return f"{self.operator}({inner})"
def fingerprint(self) -> str:
# Normalize and hash
norm = self.serialize()
return hashlib.sha256(norm.encode()).hexdigest()
def __repr__(self):
return self.serialize()
# -- Symbol Node (For Leaf Compression) -- #
class SymbolNode(ExpressionNode):
def __init__(self, symbol: str):
self.symbol = symbol
self.operator = 'symbol'
self.operands = []
def serialize(self) -> str:
return self.symbol
# -- Symbol Dictionary with Subtree Reuse -- #
class SymbolDictionary:
def __init__(self):
self.expr_to_symbol: Dict[str, str] = {}
self.symbol_to_expr: Dict[str, ExpressionNode] = {}
self.next_id = 0
def _gen_symbol(self) -> str:
# Extendable: map to unicode, base4096, or multichar tokens
sym = f"𝕊{self.next_id}"
self.next_id += 1
return sym
def get_or_create_symbol(self, node: ExpressionNode) -> SymbolNode:
fid = node.fingerprint()
if fid in self.expr_to_symbol:
return SymbolNode(self.expr_to_symbol[fid])
sym = self._gen_symbol()
self.expr_to_symbol[fid] = sym
self.symbol_to_expr[sym] = node
return SymbolNode(sym)
def resolve(self, symbol: str) -> ExpressionNode:
return self.symbol_to_expr[symbol]
# -- Grammar Engine with Operator Expansion -- #
class GrammarEngine:
def __init__(self):
self.symbols = SymbolDictionary()
def build_expression(self, text: str) -> ExpressionNode:
# Naive tokenizer: real impl would be recursive and grammar-aware
token_nodes = [ExpressionNode('char', [ord(c)]) for c in text]
while len(token_nodes) > 1:
# Pairwise combine using a rolling operator (extendable)
a = token_nodes.pop(0)
b = token_nodes.pop(0)
combined = ExpressionNode('+', [a, b])
compressed = self.symbols.get_or_create_symbol(combined)
token_nodes.insert(0, compressed)
return token_nodes[0]
def decode_expression(self, node: Union[SymbolNode, ExpressionNode]) -> str:
if isinstance(node, SymbolNode):
resolved = self.symbols.resolve(node.symbol)
return self.decode_expression(resolved)
elif node.operator == 'char':
return chr(node.operands[0])
else:
return ''.join(self.decode_expression(op) if isinstance(op, ExpressionNode) else str(op)
for op in node.operands)
# -- Example Usage -- #
if __name__ == "__main__":
engine = GrammarEngine()
input_str = "HELLO"
expr = engine.build_expression(input_str)
print("Encoded Expression:", expr)
output_str = engine.decode_expression(expr)
print("Decoded:", output_str)
Recursive Codec Grammar
import hashlib
import json
import threading
from collections import defaultdict, namedtuple
# Define expression node
tree_id_counter = 0
class ExprNode:
def __init__(self, op, args):
self.op = op
self.args = args
self.hash = self.compute_hash()
def compute_hash(self):
h = hashlib.sha256()
h.update(self.op.encode())
for a in self.args:
h.update(a.hash.encode() if isinstance(a, ExprNode) else str(a).encode())
return h.hexdigest()
def __repr__(self):
return f"({self.op} {' '.join(map(str, self.args))})"
# Symbol dictionary for compression
class SymbolDictionary:
def __init__(self):
self.expr_to_symbol = {}
self.symbol_to_expr = {}
self.counter = 0
def get_symbol(self, expr):
key = expr.hash
if key not in self.expr_to_symbol:
symbol = f"§{self.counter}§"
self.expr_to_symbol[key] = symbol
self.symbol_to_expr[symbol] = expr
self.counter += 1
return self.expr_to_symbol[key]
def decode_symbol(self, symbol):
return self.symbol_to_expr.get(symbol)
# Grammar and parser
OPERATORS = ['+', '*', '^', 'wrap', 'concat']
# Recursive encoder
class RecursiveEncoder:
def __init__(self):
self.symdict = SymbolDictionary()
def encode_string(self, string):
tokens = list(string)
tree = self.build_expr_tree(tokens)
return self.symdict.get_symbol(tree)
def build_expr_tree(self, tokens):
if len(tokens) == 1:
return ExprNode('lit', [tokens[0]])
elif len(tokens) == 2:
return ExprNode('concat', [self.build_expr_tree([tokens[0]]), self.build_expr_tree([tokens[1]])])
else:
mid = len(tokens) // 2
return ExprNode('concat', [self.build_expr_tree(tokens[:mid]), self.build_expr_tree(tokens[mid:])])
# Decoder
class RecursiveDecoder:
def __init__(self, symdict):
self.symdict = symdict
def decode_symbol(self, symbol):
expr = self.symdict.decode_symbol(symbol)
return self.evaluate(expr)
def evaluate(self, expr):
if expr.op == 'lit':
return expr.args[0]
elif expr.op == 'concat':
return self.evaluate(expr.args[0]) + self.evaluate(expr.args[1])
else:
raise ValueError(f"Unknown op: {expr.op}")
# Example usage
encoder = RecursiveEncoder()
decoder = RecursiveDecoder(encoder.symdict)
encoded = encoder.encode_string("HELLO")
print("Encoded Symbol:", encoded)
decoded = decoder.decode_symbol(encoded)
print("Decoded String:", decoded)
Recursive Encoder #4
import hashlib
import json
from typing import Any, Tuple, Union, Dict
# --- Operator Generator: maps expression signatures to symbols ---
class OperatorGenerator:
def __init__(self):
self.operators: Dict[str, str] = {} # signature -> symbol
self.symbol_to_expr: Dict[str, str] = {} # reverse lookup for decoding
self.next_symbol_code = 0xE000 # Start in Private Use Area (can go multi-char later)
def get_operator(self, signature: str) -> str:
if signature in self.operators:
return self.operators[signature]
symbol = self._generate_new_symbol()
self.operators[signature] = symbol
self.symbol_to_expr[symbol] = signature
return symbol
def _generate_new_symbol(self) -> str:
symbol = chr(self.next_symbol_code)
self.next_symbol_code += 1
return symbol
# --- Expression signature (hashable form of expression) ---
def expr_signature(expr: Any) -> str:
return hashlib.sha256(json.dumps(expr, sort_keys=True).encode()).hexdigest()
# --- Encoder ---
def encode_expression(expr: Any, op_gen: OperatorGenerator) -> str:
if isinstance(expr, str):
return expr
sig = expr_signature(expr)
return op_gen.get_operator(sig)
# --- Tokenization & Expression building (simple placeholder logic) ---
def tokenize_string(s: str) -> list:
return list(s)
def build_expression(tokens: list) -> Any:
if len(tokens) <= 1:
return tokens[0]
left = tokens[0]
for t in tokens[1:]:
left = ("concat", left, t)
return left
# --- Single-layer encoding chunk ---
def encode_chunk(input_string: str, op_gen: OperatorGenerator) -> Tuple[str, Any]:
tokens = tokenize_string(input_string)
expr_tree = build_expression(tokens)
encoded_symbol = encode_expression(expr_tree, op_gen)
return encoded_symbol, expr_tree
# --- Recursive Encoding ---
def recursive_encode(input_string: str, max_depth=5, current_depth=0, op_gen=None) -> str:
if op_gen is None:
op_gen = OperatorGenerator()
if current_depth == max_depth or len(input_string) <= 4:
encoded, _ = encode_chunk(input_string, op_gen)
return encoded
encoded, _ = encode_chunk(input_string, op_gen)
return recursive_encode(encoded, max_depth, current_depth + 1, op_gen)
# --- Decoder ---
def decode_symbol(symbol: str, op_gen: OperatorGenerator) -> str:
sig = op_gen.symbol_to_expr.get(symbol)
if not sig:
return "?"
# In a real system, you would parse and evaluate the signature to recover the original string
return f"[EXPR:{sig[:6]}...]" # placeholder
# --- Demo ---
if __name__ == "__main__":
test_string = "HELLO WORLD"
op_gen = OperatorGenerator()
encoded = recursive_encode(test_string, max_depth=5, op_gen=op_gen)
print(f"Encoded: {encoded}")
print(f"Decoded (symbolic): {decode_symbol(encoded, op_gen)}")
Recursive Encoder #5
import hashlib
import itertools
import json
from collections import defaultdict
from functools import lru_cache
# === Symbol Dictionary (Persistent Mapping) ===
symbol_table = {}
reverse_symbol_table = {}
symbol_counter = itertools.count(start=0)
# Extended alphabet base for symbol generation
SYMBOL_BASE_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()-_=+[]{};:,.<>?/|\\'
# === Safe Persistent Store ===
def save_symbol_table(path="symbol_table.json"):
with open(path, "w") as f:
json.dump(symbol_table, f)
def load_symbol_table(path="symbol_table.json"):
global symbol_table, reverse_symbol_table
try:
with open(path) as f:
symbol_table = json.load(f)
reverse_symbol_table = {v: k for k, v in symbol_table.items()}
except FileNotFoundError:
pass
# === Symbol Generator ===
def get_next_symbol():
n = next(symbol_counter)
base = len(SYMBOL_BASE_ALPHABET)
result = ''
while True:
result = SYMBOL_BASE_ALPHABET[n % base] + result
n //= base
if n == 0:
break
return result
# === Recursive Expression Tree Builder ===
class ExprNode:
def __init__(self, op, args):
self.op = op
self.args = args
def __str__(self):
return f"{self.op}({', '.join(map(str, self.args))})"
def serialize(self):
return (self.op, tuple(arg.serialize() if isinstance(arg, ExprNode) else arg for arg in self.args))
def signature(self):
return hashlib.sha256(str(self.serialize()).encode()).hexdigest()
# === Tokenizer ===
def tokenize(text):
return list(text)
# === Expression Generator ===
def build_expr(tokens):
if not tokens:
return None
if len(tokens) == 1:
return ExprNode('lit', [tokens[0]])
# Try compression heuristic: binary split
mid = len(tokens) // 2
left = build_expr(tokens[:mid])
right = build_expr(tokens[mid:])
return ExprNode('concat', [left, right])
# === Encoder ===
def encode(text):
tokens = tokenize(text)
expr = build_expr(tokens)
return encode_expr(expr)
@lru_cache(maxsize=None)
def encode_expr(expr):
sig = expr.signature()
if sig in symbol_table:
return symbol_table[sig]
symbol = get_next_symbol()
symbol_table[sig] = symbol
reverse_symbol_table[symbol] = expr
return symbol
# === Decoder ===
def decode(symbol):
expr = reverse_symbol_table.get(symbol)
if not expr:
return ""
return evaluate_expr(expr)
def evaluate_expr(expr):
if expr.op == 'lit':
return expr.args[0]
elif expr.op == 'concat':
return ''.join(evaluate_expr(arg) for arg in expr.args)
return ''
# === Save/Load on init ===
load_symbol_table()
# === Example usage ===
if __name__ == "__main__":
test = "HELLO WORLD"
sym = encode(test)
print(f"Encoded: {sym}")
print(f"Decoded: {decode(sym)}")
save_symbol_table()
Recursive Encoder #6
import hashlib
import threading
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
import itertools
import json
# Symbolic Dictionary Manager with Persistence
class SymbolDictionary:
def __init__(self):
self.expr_to_symbol = {}
self.symbol_to_expr = {}
self.next_symbol_index = 0
self.lock = threading.Lock()
def get_symbol(self, expr):
key = json.dumps(expr, sort_keys=True)
with self.lock:
if key not in self.expr_to_symbol:
symbol = self._generate_symbol(self.next_symbol_index)
self.expr_to_symbol[key] = symbol
self.symbol_to_expr[symbol] = expr
self.next_symbol_index += 1
return self.expr_to_symbol[key]
def get_expression(self, symbol):
return self.symbol_to_expr.get(symbol)
def _generate_symbol(self, index):
# Multi-char symbols if needed beyond Unicode
if index < 0xE000:
return chr(0xE000 + index)
return f'<SYM{index}>'
def save(self, filename):
with open(filename, 'w') as f:
json.dump({'expr_to_symbol': self.expr_to_symbol, 'symbol_to_expr': self.symbol_to_expr}, f)
def load(self, filename):
with open(filename, 'r') as f:
data = json.load(f)
self.expr_to_symbol = data['expr_to_symbol']
self.symbol_to_expr = data['symbol_to_expr']
# Recursive Expression Encoding
class ExpressionNode:
def __init__(self, op, children=None, value=None):
self.op = op
self.children = children if children else []
self.value = value
def to_dict(self):
if self.op == 'val':
return {'val': self.value}
return {'op': self.op, 'args': [c.to_dict() for c in self.children]}
def __repr__(self):
return f"{self.op}({', '.join(map(str, self.children))})" if self.children else str(self.value)
# Tokenizer with frequency compression prep
def tokenize_string(s):
return list(s) # Can extend to n-grams or entropy-reducing segmentations
def build_expression(tokens):
# Naive balanced nesting + operator alternation
if len(tokens) == 1:
return ExpressionNode('val', value=ord(tokens[0]))
mid = len(tokens) // 2
left = build_expression(tokens[:mid])
right = build_expression(tokens[mid:])
return ExpressionNode('add', [left, right]) # Change operator heuristically
def compress_expression(expr, subtree_map):
key = json.dumps(expr.to_dict(), sort_keys=True)
if key in subtree_map:
return subtree_map[key]
if expr.op == 'val':
return expr
new_children = [compress_expression(c, subtree_map) for c in expr.children]
new_expr = ExpressionNode(expr.op, new_children)
subtree_map[key] = new_expr
return new_expr
def encode_expression(expr, symdict):
return symdict.get_symbol(expr.to_dict())
def decode_symbol(symbol, symdict):
expr_dict = symdict.get_expression(symbol)
return eval_expression_dict(expr_dict)
def eval_expression_dict(d):
if 'val' in d:
return chr(d['val'])
op = d['op']
args = [eval_expression_dict(arg) for arg in d['args']]
if op == 'add':
return ''.join(args)
return ''.join(args) # Extend for other ops
# Parallel encoding
symdict = SymbolDictionary()
def full_encode_pipeline(s):
tokens = tokenize_string(s)
tree = build_expression(tokens)
compressed = compress_expression(tree, {})
symbol = encode_expression(compressed, symdict)
return symbol
def full_decode_pipeline(symbol):
return decode_symbol(symbol, symdict)
def parallel_encode(s, chunk_size=512):
chunks = [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]
with ThreadPoolExecutor() as executor:
results = list(executor.map(full_encode_pipeline, chunks))
return results
# Example
if __name__ == '__main__':
test_string = "HELLO WORLD, THIS IS A TEST OF THE RECURSIVE ENCODER!"
encoded = parallel_encode(test_string)
print("Encoded Symbols:", encoded)
decoded = ''.join([full_decode_pipeline(sym) for sym in encoded])
print("Decoded String:", decoded)
Recursive Encoder Decoder
import hashlib
import math
import threading
import json
import uuid
from collections import defaultdict
# === Symbol Dictionary ===
symbol_dict = {}
reverse_symbol_dict = {}
lock = threading.Lock()
# Safe symbol generator that uses UUIDs + custom scheme
def generate_symbol(expression):
key = str(expression)
with lock:
if key in symbol_dict:
return symbol_dict[key]
symbol = "⟦" + hashlib.sha256(key.encode()).hexdigest()[:8] + "⟧"
symbol_dict[key] = symbol
reverse_symbol_dict[symbol] = expression
return symbol
# === Expression Builder ===
def build_expression(tokens):
if len(tokens) == 1:
return tokens[0]
elif len(tokens) == 2:
return f"sum({tokens[0]},{tokens[1]})"
else:
mid = len(tokens) // 2
left = build_expression(tokens[:mid])
right = build_expression(tokens[mid:])
return f"mul({left},{right})"
# === Tokenizer ===
def tokenize_string(s):
freq = defaultdict(int)
for c in s:
freq[c] += 1
return [f"{c}^{freq[c]}" for c in sorted(freq.keys())]
# === Encoder ===
def encode_string(s):
tokens = tokenize_string(s)
expr = build_expression(tokens)
symbol = generate_symbol(expr)
return symbol
# === Safe Expression Evaluator ===
def evaluate_expression(expr):
if expr.startswith("sum("):
inner = expr[4:-1]
a, b = map(evaluate_expression, split_args(inner))
return a + b
elif expr.startswith("mul("):
inner = expr[4:-1]
a, b = map(evaluate_expression, split_args(inner))
return a * b
elif "^" in expr:
c, exp = expr.split("^")
return ord(c) * int(exp)
else:
return ord(expr)
def split_args(inner):
depth, result, current = 0, [], ""
for ch in inner:
if ch == "," and depth == 0:
result.append(current)
current = ""
else:
if ch == "(": depth += 1
elif ch == ")": depth -= 1
current += ch
result.append(current)
return result
# === Decoder ===
def decode_symbol(symbol):
expr = reverse_symbol_dict.get(symbol)
if not expr:
raise ValueError("Unknown symbol")
return evaluate_expression(expr)
# === Save/Load Dictionary ===
def save_dictionary(filename="symbol_dict.json"):
with open(filename, "w") as f:
json.dump(symbol_dict, f)
def load_dictionary(filename="symbol_dict.json"):
global symbol_dict, reverse_symbol_dict
with open(filename, "r") as f:
symbol_dict = json.load(f)
reverse_symbol_dict = {v: k for k, v in symbol_dict.items()}
# === Example Usage ===
if __name__ == "__main__":
sample = "HELLO WORLD"
symbol = encode_string(sample)
print(f"Symbol for '{sample}': {symbol}")
print(f"Decoded (numerical score): {decode_symbol(symbol)}")
Megc Encoder Deploy
# main.py - Entrypoint for MEGC Recursive Encoder/Decoder
from encoder import encode_string
from decoder import decode_symbol
from dictionary import SymbolDictionary
import argparse
# Persistent dictionary instance
dictionary = SymbolDictionary('symbol_map.json')
parser = argparse.ArgumentParser(description="MEGC: Mapped Entropic Golden Codec")
subparsers = parser.add_subparsers(dest='command')
# Encode Command
encode_parser = subparsers.add_parser('encode')
encode_parser.add_argument('input', type=str, help='Input string to encode')
# Decode Command
decode_parser = subparsers.add_parser('decode')
decode_parser.add_argument('symbol', type=str, help='Symbol to decode')
args = parser.parse_args()
if args.command == 'encode':
symbol = encode_string(args.input, dictionary)
print(f"Encoded Symbol: {symbol}")
elif args.command == 'decode':
result = decode_symbol(args.symbol, dictionary)
print(f"Decoded String: {result}")
else:
parser.print_help()
Meg Codec Main #1
# meg_codec_main.py
# Entry point for MEGC - Mapped Entropic Golden Codec
from encoder import RecursiveEncoder
from decoder import RecursiveDecoder
from dictionary import SymbolDictionary
# Initialize persistent symbol dictionary
symbol_dict = SymbolDictionary(persist_file="megcodec_symbols.json")
# Instantiate encoder and decoder with the same dictionary
encoder = RecursiveEncoder(symbol_dict)
decoder = RecursiveDecoder(symbol_dict)
def encode_string(input_string):
print("Encoding:", input_string)
encoded = encoder.encode(input_string)
print("Encoded:", encoded)
return encoded
def decode_string(encoded_symbol):
print("Decoding symbol:", encoded_symbol)
decoded = decoder.decode(encoded_symbol)
print("Decoded string:", decoded)
return decoded
if __name__ == "__main__":
# Example usage
original = "HELLO WORLD, THIS IS A TEST OF THE RECURSIVE ENCODER!"
encoded = encode_string(original)
decoded = decode_string(encoded)
if decoded == original:
print("✅ Roundtrip successful")
else:
print("❌ Mismatch!")
Meg Codec Main #2
import os
import pickle
from encoder import RecursiveEncoder
from decoder import RecursiveDecoder
from dictionary import SymbolDictionary
# Configuration
DICTIONARY_PATH = "symbol_dict.pkl"
# Initialize persistent symbol dictionary
dictionary = SymbolDictionary(DICTIONARY_PATH)
# Create encoder and decoder with shared symbol dictionary
encoder = RecursiveEncoder(dictionary)
decoder = RecursiveDecoder(dictionary)
def encode_text_to_symbol(text: str) -> str:
"""Encode full text input into a compressed symbolic representation."""
expression_tree = encoder.build_expression_tree(text)
compressed_tree = encoder.compress_tree(expression_tree)
symbol = encoder.tree_to_symbol(compressed_tree)
return symbol
def decode_symbol_to_text(symbol: str) -> str:
"""Decode a symbolic representation back to the original text."""
expression = decoder.symbol_to_expression(symbol)
reconstructed_text = decoder.evaluate_expression(expression)
return reconstructed_text
def encode_file(input_path: str, output_path: str):
with open(input_path, 'r', encoding='utf-8') as f:
text = f.read()
symbol = encode_text_to_symbol(text)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(symbol)
dictionary.save() # Save updated symbol mappings
def decode_file(input_path: str, output_path: str):
with open(input_path, 'r', encoding='utf-8') as f:
symbol = f.read()
text = decode_symbol_to_text(symbol)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(text)
dictionary.save()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description="MEGC Symbolic Codec")
parser.add_argument('mode', choices=['encode', 'decode'])
parser.add_argument('input', help="Input file path")
parser.add_argument('output', help="Output file path")
args = parser.parse_args()
if args.mode == 'encode':
encode_file(args.input, args.output)
elif args.mode == 'decode':
decode_file(args.input, args.output)