Dreaming Up Something Enormously Small Part 1

Josef_Founder · June 7, 2025, 1:59am

To attend:

Base4096 Deterministic

import math
from functools import lru_cache

# Define operator functions and symbols
binary_ops = {
    '+': lambda a, b: a + b,
    '-': lambda a, b: a - b,
    '*': lambda a, b: a * b,
    '/': lambda a, b: a // b if b != 0 else None,
    '%': lambda a, b: a % b if b != 0 else None,
    '^': lambda a, b: a ** b if b >= 0 else None,
    '&': lambda a, b: a & b,
    '|': lambda a, b: a | b,
}

unary_ops = {
    '~': lambda a: ~a
}

# AST node types
class Expr:
    def eval(self): raise NotImplementedError
    def serialize(self): raise NotImplementedError

class Literal(Expr):
    def __init__(self, value):
        self.value = value

    def eval(self): return self.value

    def serialize(self): return f"L{self.value}"

class UnaryOp(Expr):
    def __init__(self, op, expr):
        self.op = op
        self.expr = expr

    def eval(self): return unary_ops[self.op](self.expr.eval())

    def serialize(self): return f"U{self.op}{self.expr.serialize()}"

class BinaryOp(Expr):
    def __init__(self, op, left, right):
        self.op = op
        self.left = left
        self.right = right

    def eval(self): return binary_ops[self.op](self.left.eval(), self.right.eval())

    def serialize(self): return f"O{self.op}{self.left.serialize()}{self.right.serialize()}"

# Optimized expression builder with depth control
@lru_cache(maxsize=None)
def build_expr(n, max_depth=4):
    if max_depth == 0 or n < 0:
        return None

    best = Literal(n)

    # Try unary operators
    for op in unary_ops:
        inv = None
        try:
            inv = unary_ops[op](n)
        except:
            continue
        subexpr = build_expr(inv, max_depth - 1)
        if subexpr:
            cand = UnaryOp(op, subexpr)
            if cand.eval() == n and len(cand.serialize()) < len(best.serialize()):
                best = cand

    # Try binary operations
    for op in binary_ops:
        for a in range(1, n + 1):
            b = None
            try:
                b = binary_ops[op](a, n - a)
            except:
                continue
            if b is None: continue
            left = build_expr(a, max_depth - 1)
            right = build_expr(n - a, max_depth - 1)
            if left and right:
                cand = BinaryOp(op, left, right)
                try:
                    if cand.eval() == n and len(cand.serialize()) < len(best.serialize()):
                        best = cand
                except:
                    continue

    return best

# Encoding and decoding interface
def encode(n):
    expr = build_expr(n)
    return expr.serialize() if expr else None

def decode(s):
    def parse(index):
        token = s[index]
        if token == 'L':
            num = ''
            i = index + 1
            while i < len(s) and s[i].isdigit():
                num += s[i]
                i += 1
            return Literal(int(num)), i
        elif token == 'U':
            op = s[index + 1]
            subexpr, i = parse(index + 2)
            return UnaryOp(op, subexpr), i
        elif token == 'O':
            op = s[index + 1]
            left, i = parse(index + 2)
            right, j = parse(i)
            return BinaryOp(op, left, right), j
        else:
            raise ValueError("Invalid token")

    expr, _ = parse(0)
    return expr.eval()

# Example
if __name__ == '__main__':
    for n in range(1, 20):
        encoded = encode(n)
        decoded = decode(encoded)
        print(f"{n} => {encoded} => {decoded}")

Expression Alphabet

import hashlib
import operator
from typing import Union, Callable, List, Optional, Dict, Tuple
import itertools
import math

# --- Define atomic value types ---
Expr = Union[int, str, 'Node']

class Node:
    def __init__(self, op_name: str, operands: List[Expr], depth: int = 1):
        self.op_name = op_name
        self.operands = operands
        self.depth = depth

    def __repr__(self):
        return f"{self.op_name}({', '.join(repr(o) for o in self.operands)})"

# --- Meta-grammar for operator evolution ---
BASE_OPERATORS = {
    'add': operator.add,
    'mul': operator.mul,
    'xor': operator.xor,
    'pow': pow
}

DERIVED_OPERATORS: Dict[str, Callable] = {}
DEPTH_OPERATORS: Dict[int, List[str]] = {}

# Register hybrid or depth-aware operators
def register_operator(name: str, func: Callable, depth: int):
    if name not in BASE_OPERATORS and name not in DERIVED_OPERATORS:
        DERIVED_OPERATORS[name] = func
        if depth not in DEPTH_OPERATORS:
            DEPTH_OPERATORS[depth] = []
        DEPTH_OPERATORS[depth].append(name)

# Generate derived operators up to a given depth
def generate_derived_operators(max_depth=3):
    for depth in range(2, max_depth + 1):
        for ops in itertools.product(BASE_OPERATORS, repeat=depth):
            name = '_'.join(ops)
            def make_func(ops=ops):
                def combo(a, b):
                    result = a
                    for op in ops:
                        result = BASE_OPERATORS[op](result, b)
                    return result
                return combo
            register_operator(name, make_func(), depth)

# --- Evaluate any expression node or literal ---
def evaluate(expr: Expr) -> int:
    if isinstance(expr, int):
        return expr
    if isinstance(expr, str):
        return int(hashlib.sha256(expr.encode()).hexdigest(), 16) % 4096
    if isinstance(expr, Node):
        fn = BASE_OPERATORS.get(expr.op_name) or DERIVED_OPERATORS.get(expr.op_name)
        if fn is None:
            raise ValueError(f"Unknown operator: {expr.op_name}")
        args = [evaluate(o) for o in expr.operands]
        return fn(*args)
    raise TypeError(f"Unsupported expr: {expr}")

# --- Grammar-guided expression generator ---
def generate_expressions(depth: int = 2) -> List[Node]:
    literals = list(range(0, 10))
    exprs: List[Expr] = literals[:]
    all_nodes = []
    for d in range(1, depth + 1):
        ops = list(BASE_OPERATORS.keys()) + DEPTH_OPERATORS.get(d, [])
        new_exprs = []
        for op in ops:
            for a, b in itertools.product(exprs, repeat=2):
                node = Node(op, [a, b], depth=d)
                all_nodes.append(node)
                new_exprs.append(node)
        exprs.extend(new_exprs)
    return all_nodes

# --- Match expression to a target string hash ---
def find_expression_for_string(target: str, max_depth: int = 3) -> Optional[Node]:
    target_hash = int(hashlib.sha256(target.encode()).hexdigest(), 16) % 4096
    candidates = generate_expressions(depth=max_depth)
    for expr in candidates:
        try:
            if evaluate(expr) == target_hash:
                return expr
        except Exception:
            continue
    return None

# --- Canonical encoder/decoder ---
def encode_string_as_expression(s: str, max_depth: int = 3) -> Node:
    expr = find_expression_for_string(s, max_depth)
    if expr is None:
        raise ValueError("Could not encode string within given depth")
    return expr

def decode_expression(expr: Node) -> str:
    val = evaluate(expr)
    return f"string_with_hash_{val}"

# --- Full document encoding ---
def encode_document(text: str, max_depth: int = 3) -> List[Node]:
    lines = text.splitlines()
    expressions = []
    for line in lines:
        try:
            expr = encode_string_as_expression(line, max_depth)
            expressions.append(expr)
        except ValueError:
            expressions.append(None)
    return expressions

def decode_document(expressions: List[Optional[Node]]) -> List[str]:
    return [decode_expression(expr) if expr else "UNENCODABLE" for expr in expressions]

# --- Recursive assembler for higher-level encoding ---
def assemble_expressions(expressions: List[Node], max_depth: int = 4) -> Optional[Node]:
    expr_strs = [repr(expr) for expr in expressions]
    combined_str = '|'.join(expr_strs)
    return find_expression_for_string(combined_str, max_depth=max_depth)

# --- Expand system ---
generate_derived_operators(max_depth=4)

# --- Example usage ---
if __name__ == "__main__":
    doc = """
    In the beginning God created the heavens and the earth.
    And the earth was without form, and void; and darkness was upon the face of the deep.
    And the Spirit of God moved upon the face of the waters.
    """
    expressions = encode_document(doc, max_depth=3)
    print("-- Level 1 Expressions --")
    for expr in expressions:
        print("Expr:", expr)
        print("Decoded:", decode_expression(expr) if expr else "Failed")

    top_expr = assemble_expressions([e for e in expressions if e], max_depth=4)
    print("\n-- Level 2 Unified Expression --")
    print("Expr:", top_expr)
    print("Decoded:", decode_expression(top_expr) if top_expr else "Failed")

Deterministic Encoder

import math
import hashlib
from functools import lru_cache

# --- Stage 1: Alphabet Growth Estimator ---

# Define operator set with their arity (number of arguments)
OPERATORS = {
    '+': 2,
    '*': 2,
    '^': 2,
    '-': 2,
    '/': 2,
    '%': 2,
    'neg': 1,
    'sqrt': 1,
    'log': 1,
    'exp': 1,
    'max': 2,
    'min': 2,
    'sum': -1,  # variable arity
    'prod': -1,
}

# Estimate number of expressions possible at given depth
@lru_cache(maxsize=None)
def count_expressions(depth):
    if depth == 0:
        return 1  # base literal (a symbol or constant)
    total = 0
    for op, arity in OPERATORS.items():
        if arity == -1:
            # Assume max 3 args for variadic ops
            total += sum(math.comb(count_expressions(depth - 1) + i - 1, i)
                         for i in range(2, 4))
        else:
            total += count_expressions(depth - 1) ** arity
    return total


# --- Stage 2: Approximate-Index Expression Grammar ---

def string_to_fingerprint(s):
    h = hashlib.sha512(s.encode()).digest()
    return int.from_bytes(h, byteorder='big')


def build_expression_from_index(index, max_depth):
    """
    Given a numeric index and max depth, deterministically build an expression.
    This is a placeholder: you’ll replace this with full grammar walking.
    """
    if max_depth == 0:
        return f"{index % 10}"  # just a leaf literal

    op_keys = list(OPERATORS.keys())
    op = op_keys[index % len(op_keys)]
    arity = OPERATORS[op] if OPERATORS[op] != -1 else 2  # simplify
    sub_indices = [(index // (len(op_keys) ** (i + 1))) % 997 for i in range(arity)]
    args = [build_expression_from_index(i, max_depth - 1) for i in sub_indices]
    return f"({op} {' '.join(args)})"


# --- Stage 3: Resolver ---

def encode_string(s, max_depth=5):
    fp = string_to_fingerprint(s)
    expr = build_expression_from_index(fp, max_depth)
    return expr


# --- Stage 4: Inverse Hash Decoder (Approximate) ---

def decode_expression_to_fingerprint(expr):
    """
    Reverse hashing isn't feasible, so instead we approximate by comparing
    hashes from candidate expressions until one matches.
    """
    raise NotImplementedError("Perfect reverse decoding is not feasible via SHA512. Consider storing a lookup table or reversible grammar.")


if __name__ == '__main__':
    test_str = "HELLO WORLD, THIS IS A TEST OF THE RECURSIVE ENCODER!"
    est_size = count_expressions(5)
    print(f"Total expressible characters (depth 5): {est_size}")
    print("Encoded expression:")
    print(encode_string(test_str))

Recursive Codec Roadmap

# Recursive Codec Full Roadmap Implementation
# Step-by-step expansion of deterministic nested operator grammar codec.
# Author: Josef Kulovany - ZCHG.org

from typing import Union, List
import operator

# -------------------
# 1. Core Expression Data Structure
# -------------------

class ExprChar:
    def __init__(self, op: str, operands: List[Union[int, 'ExprChar']]):
        self.op = op
        self.operands = operands

    def __repr__(self):
        return f"{self.op}({', '.join(map(str, self.operands))})"

# -------------------
# 2. Grammar and Operator Set Expansion
# -------------------

basic_ops = {
    '+': operator.add,
    '*': operator.mul,
    '^': operator.pow,
    '-': operator.sub,
    '/': lambda a, b: a // b if b != 0 else 1,
    '%': lambda a, b: a % b if b != 0 else 0
}

# Future: Extend with logical, bitwise, and hybrid operators

# -------------------
# 3. Evaluation
# -------------------

def evaluate(expr: Union[int, ExprChar]) -> int:
    if isinstance(expr, int):
        return expr
    op_func = basic_ops[expr.op]
    args = [evaluate(arg) for arg in expr.operands]
    result = args[0]
    for arg in args[1:]:
        result = op_func(result, arg)
    return result

# -------------------
# 4. Encoding Integer to Expression Tree (Depth-Limited)
# -------------------

def encode_int(n: int, depth: int = 2) -> ExprChar:
    if depth == 0 or n < 2:
        return ExprChar('+', [n])
    for op in ['*', '+', '^', '-', '/', '%']:
        for a in range(1, n):
            try:
                b = infer_b(n, a, op)
                if b is not None and evaluate(ExprChar(op, [a, b])) == n:
                    return ExprChar(op, [encode_int(a, depth - 1), encode_int(b, depth - 1)])
            except:
                continue
    return ExprChar('+', [n])

def infer_b(result, a, op):
    try:
        if op == '+': return result - a
        if op == '*': return result // a if a != 0 else None
        if op == '^': return int(round(math.log(result, a)))
        if op == '-': return a - result
        if op == '/': return a // result if result != 0 else None
        if op == '%': return result if a != 0 else None
    except:
        return None

# -------------------
# 5. Serialization / Deserialization (Text Format)
# -------------------

def serialize(expr):
    if isinstance(expr, int):
        return str(expr)
    return f"{expr.op}[{','.join(map(serialize, expr.operands))}]"

def deserialize(s: str):
    if s.isdigit(): return int(s)
    op = s[0]
    inner = s[s.find('[')+1:-1]
    parts = split_expr(inner)
    return ExprChar(op, [deserialize(p) for p in parts])

def split_expr(s):
    depth = 0
    current = ''
    parts = []
    for c in s:
        if c == '[': depth += 1
        if c == ']': depth -= 1
        if c == ',' and depth == 0:
            parts.append(current)
            current = ''
        else:
            current += c
    if current:
        parts.append(current)
    return parts

# -------------------
# 6. String Encoding via Word Frequency -> Integer Mapping
# -------------------

def string_to_freq_map(s):
    from collections import Counter
    c = Counter(s)
    total = 0
    for ch in sorted(c):
        total = total * 256 + ord(ch) * c[ch]
    return total

# -------------------
# 7. Full Encode/Decode Pipeline
# -------------------

def encode_string(s: str, depth=3) -> str:
    n = string_to_freq_map(s)
    tree = encode_int(n, depth)
    return serialize(tree)

def decode_string(encoded: str) -> int:
    tree = deserialize(encoded)
    return evaluate(tree)

# -------------------
# 8. Next Up: Nested Compression Layers
# -------------------
# - Encode each encoded expression as its own character via symbolic alphabet
# - Layer deeper (ExprChar of ExprChar of ExprChar...)
# - Full fingerprint and reconstruction path

# Let me know when to begin layering or visual tooling!

Recursive Codec Pipeline

Recursive Codec Pipeline with Layered Encoding

Overview

This pipeline deterministically encodes any string into a unique symbolic representation using a grammar of nested operators. It supports recursion by building higher-order expressions from encoded symbols of smaller segments.

1. Core Components

Alphabet and Operators

basic_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
operators = ['+', '*', '^', '~', '@', '#', '&', '|', '%', '!', ':', '?']

Expression Node (Recursive Expression Grammar)

class ExprNode:
    def __init__(self, op, children):
        self.op = op
        self.children = children

    def __str__(self):
        return f"({self.op} {', '.join(str(c) for c in self.children)})"

    def evaluate(self):
        vals = [c.evaluate() if isinstance(c, ExprNode) else c for c in self.children]
        if self.op == '+': return sum(vals)
        if self.op == '*':
            result = 1
            for v in vals: result *= v
            return result
        if self.op == '^':
            base, exp = vals
            return base ** exp
        # Extend with additional logic per operator as needed
        return 0

2. Encoding Functions

String to Integer via Hashing

def string_to_integer(s):
    value = 0
    for i, c in enumerate(s):
        value += ord(c) * (len(s) ** i)
    return value

Integer to Expression Tree

def int_to_expr(n, depth=0, max_depth=3):
    if depth >= max_depth or n < len(basic_alphabet):
        return n
    op = operators[n % len(operators)]
    left = int_to_expr(n // 2, depth + 1, max_depth)
    right = int_to_expr(n // 3, depth + 1, max_depth)
    return ExprNode(op, [left, right])

Expression to Symbol Mapping (Placeholder)

def expr_to_symbol(expr):
    return f"<{hash(str(expr)) & 0xFFFFFF:X}>"  # Simulate unique character ID

Full Encode

def encode_string(s):
    n = string_to_integer(s)
    expr = int_to_expr(n)
    return expr_to_symbol(expr)

3. Recursive Layering

Layer Text into Blocks

def layer_text(text, block_size=16):
    blocks = [text[i:i+block_size] for i in range(0, len(text), block_size)]
    encoded_blocks = [encode_string(b) for b in blocks]
    return encode_string(''.join(encoded_blocks))

4. Decoding (Prototype Stage)

Symbol to Expression (Inverse Mapping)

def symbol_to_expr(symbol):
    # Placeholder inverse of expr_to_symbol, use deterministic grammar in full version
    return ExprNode('+', [1, 2])

Evaluate Expression to Recover Integer

def expr_to_int(expr):
    return expr.evaluate() if isinstance(expr, ExprNode) else expr

Integer to Approximate String (Stub)

def int_to_string(n):
    # Reconstruct approximate string, requires compression dictionary in full version
    return f"String({n})"

Full Decode (Stub)

def decode_symbol(symbol):
    expr = symbol_to_expr(symbol)
    n = expr_to_int(expr)
    return int_to_string(n)

5. Example Usage

text = "HELLO WORLD, THIS IS A TEST OF THE RECURSIVE ENCODER!"
symbol = layer_text(text)
print("Encoded as symbol:", symbol)

Expression Codec

import math
from typing import Union, Dict, Tuple

# Expression Node
type Expr = Union[int, Tuple[str, 'Expr', 'Expr']]

# Global symbol dictionary for encoding/decoding
expr_to_symbol: Dict[str, str] = {}
symbol_to_expr: Dict[str, str] = {}
symbol_base = 0xE000  # Start of Private Use Area

# Operators in precedence order (lower index = higher precedence)
OPERATORS = [
    ('^', lambda a, b: a ** b),
    ('*', lambda a, b: a * b),
    ('+', lambda a, b: a + b)
]

# Encode an expression as a string

def expr_to_str(expr: Expr) -> str:
    if isinstance(expr, int):
        return str(expr)
    op, left, right = expr
    return f"({expr_to_str(left)}{op}{expr_to_str(right)})"

# Recursively evaluate an expression
def eval_expr(expr: Expr) -> int:
    if isinstance(expr, int):
        return expr
    op, left, right = expr
    for symbol, func in OPERATORS:
        if op == symbol:
            return func(eval_expr(left), eval_expr(right))
    raise ValueError(f"Unknown operator: {op}")

# Generate a unique character for each expression string
def get_symbol(expr: Expr) -> str:
    key = expr_to_str(expr)
    if key in expr_to_symbol:
        return expr_to_symbol[key]
    global symbol_base
    if symbol_base > 0xF8FF:
        raise OverflowError("PUA exhausted")
    symbol = chr(symbol_base)
    expr_to_symbol[key] = symbol
    symbol_to_expr[symbol] = key
    symbol_base += 1
    return symbol

# Decompose integer into minimal expression using precedence
# This can be improved with heuristics and memoization.
def decompose(n: int) -> Expr:
    if n < 10:
        return n
    # Heuristic: prioritize exponentiation
    for b in range(2, int(math.log(n, 2)) + 2):
        a = round(n ** (1 / b))
        if a ** b == n:
            return ('^', decompose(a), decompose(b))
    for b in range(2, n // 2 + 1):
        if n % b == 0:
            return ('*', decompose(n // b), decompose(b))
    return ('+', decompose(n - 1), 1)

# Encode a string as a symbol sequence using expression encoding
def encode_string(s: str) -> str:
    freq = {}
    for c in s:
        freq[c] = freq.get(c, 0) + 1
    total = sum((ord(c) * count for c, count in freq.items()))
    expr = decompose(total)
    return get_symbol(expr)

# Decode a symbol back to its expression and evaluate
def decode_symbol(symbol: str) -> int:
    expr_str = symbol_to_expr.get(symbol)
    if not expr_str:
        raise ValueError("Unknown symbol")
    # A full parser would be needed here to parse the expression string.
    # For now, assume the mapping is trusted.
    expr = eval(compile(expr_str, "<string>", "eval"))
    return expr

if __name__ == "__main__":
    s = "HELLO WORLD"
    encoded = encode_string(s)
    print(f"Original: {s}")
    print(f"Encoded symbol: {encoded}")
    print(f"Decoded (raw eval value): {decode_symbol(encoded)}")

Expression Codec Pipeline

# === expression_codec_pipeline.py ===

import hashlib
from collections import defaultdict

# === Expression Node Class ===
class ExprNode:
    def __init__(self, op=None, children=None, value=None):
        self.op = op  # Operator: '+', '*', '^', 'sum', etc.
        self.children = children or []
        self.value = value  # For leaves

    def is_leaf(self):
        return self.op is None

    def serialize(self):
        if self.is_leaf():
            return self.value
        return f"({self.op} {' '.join(child.serialize() for child in self.children)})"

    def hash(self):
        if self.is_leaf():
            return hashlib.sha256(self.value.encode()).hexdigest()
        combined = self.op + ''.join(child.hash() for child in self.children)
        return hashlib.sha256(combined.encode()).hexdigest()

# === Parser ===
def parse_expr(s):
    s = s.strip()
    if not s.startswith('('):
        return ExprNode(value=s)
    s = s[1:-1].strip()
    op, rest = s.split(' ', 1)
    children = []
    depth = 0
    token = ''
    for c in rest:
        if c == '(':
            depth += 1
        elif c == ')':
            depth -= 1
        if c == ' ' and depth == 0:
            if token:
                children.append(parse_expr(token))
                token = ''
        else:
            token += c
    if token:
        children.append(parse_expr(token))
    return ExprNode(op=op, children=children)

# === Tree Construction ===
def build_balanced_tree(s, op='+'):
    if len(s) == 1:
        return ExprNode(value=s)
    mid = len(s) // 2
    left = build_balanced_tree(s[:mid], op)
    right = build_balanced_tree(s[mid:], op)
    return ExprNode(op=op, children=[left, right])

# === Compression / Subtree Reuse ===
def compress_tree(root):
    cache = {}
    def recurse(node):
        h = node.hash()
        if h in cache:
            return ExprNode(value=f"REF_{h[:6]}")
        cache[h] = node
        if not node.is_leaf():
            node.children = [recurse(c) for c in node.children]
        return node
    return recurse(root), cache

# === Operator Precedence Extension ===
def build_tree_with_ops(s, ops=['+', '*', '^']):
    if len(s) == 1:
        return ExprNode(value=s)
    op = ops[len(s) % len(ops)]  # Rotate ops deterministically
    mid = len(s) // 2
    left = build_tree_with_ops(s[:mid], ops)
    right = build_tree_with_ops(s[mid:], ops)
    return ExprNode(op=op, children=[left, right])

# === Symbol Dictionary ===
class SymbolDictionary:
    def __init__(self):
        self.expr_to_symbol = {}
        self.symbol_to_expr = {}
        self.counter = 0

    def get_symbol(self, expr):
        h = expr.hash()
        if h not in self.expr_to_symbol:
            sym = f"S{self.counter}"
            self.counter += 1
            self.expr_to_symbol[h] = sym
            self.symbol_to_expr[sym] = expr
        return self.expr_to_symbol[h]

    def get_expr(self, symbol):
        return self.symbol_to_expr.get(symbol)

# === Full Encode / Decode ===
def encode(string, ops=['+', '*', '^']):
    tree = build_tree_with_ops(string, ops)
    compressed, _ = compress_tree(tree)
    return compressed.serialize()

def decode(expr_str):
    tree = parse_expr(expr_str)
    def flatten(node):
        if node.is_leaf():
            return node.value
        return ''.join(flatten(c) for c in node.children)
    return flatten(tree)

# === Example Usage ===
if __name__ == "__main__":
    test_str = "HELLOWORLD"
    encoded = encode(test_str)
    print("Encoded Expression:", encoded)
    decoded = decode(encoded)
    print("Decoded String:", decoded)

Recursive Encoder #1

import hashlib
import itertools
import json
from collections import defaultdict

# --- Symbol Dictionary ---
class SymbolDictionary:
    def __init__(self):
        self.symbol_counter = 0
        self.symbol_to_expr = {}
        self.expr_to_symbol = {}
        self.base_symbols = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()")

    def get_symbol(self, expr):
        key = self._fingerprint(expr)
        if key in self.expr_to_symbol:
            return self.expr_to_symbol[key]
        symbol = self._next_symbol()
        self.symbol_to_expr[symbol] = expr
        self.expr_to_symbol[key] = symbol
        return symbol

    def _next_symbol(self):
        if self.symbol_counter < len(self.base_symbols):
            sym = self.base_symbols[self.symbol_counter]
        else:
            sym = f"@{self.symbol_counter}"
        self.symbol_counter += 1
        return sym

    def _fingerprint(self, expr):
        return hashlib.sha256(json.dumps(expr, sort_keys=True).encode()).hexdigest()

# --- Tokenizer ---
def tokenize(text):
    return list(text)

# --- Operator Grammar ---
class Operator:
    def __init__(self, name, arity, func):
        self.name = name
        self.arity = arity
        self.func = func

    def apply(self, args):
        return self.func(*args)

# Define simple example operators
OPERATORS = [
    Operator("ADD", 2, lambda a, b: a + b),
    Operator("MUL", 2, lambda a, b: a * b),
    Operator("XOR", 2, lambda a, b: a ^ b),
    Operator("SHF", 2, lambda a, b: a << b),
    Operator("POW", 2, lambda a, b: a ** b if b < 10 else 1),
]

# --- Expression Compression ---
def compress_tokens(tokens, symdict):
    leaves = [symdict.get_symbol({"val": t}) for t in tokens]
    while len(leaves) > 1:
        new_leaves = []
        for i in range(0, len(leaves), 2):
            if i + 1 < len(leaves):
                op = OPERATORS[i % len(OPERATORS)]
                expr = {"op": op.name, "args": [leaves[i], leaves[i + 1]]}
            else:
                expr = {"val": leaves[i]}
            new_leaves.append(symdict.get_symbol(expr))
        leaves = new_leaves
    return leaves[0]

# --- Decoder ---
def decode_symbol(sym, symdict):
    expr = symdict.symbol_to_expr.get(sym)
    if not expr:
        return sym
    if "val" in expr:
        return expr["val"]
    if "op" in expr:
        args = [decode_symbol(a, symdict) for a in expr["args"]]
        return ''.join(args)
    return "?"

# --- Full Encode/Decode API ---
def encode(text):
    symdict = SymbolDictionary()
    tokens = tokenize(text)
    final_symbol = compress_tokens(tokens, symdict)
    return final_symbol, symdict

def decode(symbol, symdict):
    return decode_symbol(symbol, symdict)

# --- Example Usage ---
if __name__ == "__main__":
    text = "HELLO WORLD"
    encoded, symdict = encode(text)
    print("Encoded symbol:", encoded)
    decoded = decode(encoded, symdict)
    print("Decoded string:", decoded)

Recursive Operator Grammar

# Enhanced operator pattern detection
# + Recursive grammar integration scaffolding

from typing import List
from collections import Counter
import re

# Expression Node
class ExprNode:
    def __init__(self, op, *args):
        self.op = op
        self.args = args

    def __eq__(self, other):
        return isinstance(other, ExprNode) and self.op == other.op and self.args == other.args

    def __hash__(self):
        return hash((self.op, self.args))

    def __repr__(self):
        if not self.args:
            return f"'{self.op}'"
        return f"{self.op}({', '.join(map(repr, self.args))})"

# Detect patterns and choose operators

def detect_patterns(s: str) -> ExprNode:
    # Detect perfect repetition: e.g. "abcabcabc" => power(concat('a','b','c'), 3)
    n = len(s)
    for l in range(1, n // 2 + 1):
        if n % l == 0:
            repeat_unit = s[:l]
            if repeat_unit * (n // l) == s:
                return ExprNode('power', detect_patterns(repeat_unit), ExprNode(str(n // l)))

    # Detect addition of same character: e.g. "aaaa" => sum('a', 4)
    if len(set(s)) == 1:
        return ExprNode('sum', ExprNode(s[0]), ExprNode(str(len(s))))

    # Detect basic arithmetic numeric strings (optional)
    if re.fullmatch(r"\d+(\+\d+)+", s):
        parts = s.split('+')
        return ExprNode('add_chain', *(ExprNode(p) for p in parts))

    # Default to concat tree
    if len(s) == 1:
        return ExprNode(s)
    mid = len(s) // 2
    return ExprNode('concat', detect_patterns(s[:mid]), detect_patterns(s[mid:]))

# Basic grammar scaffolding for recursive operator expansion
class OperatorGrammar:
    def __init__(self):
        self.rules = {
            'concat': lambda args: ''.join(args),
            'sum': lambda args: args[0] * int(args[1]),
            'power': lambda args: args[0] * int(args[1]),
            'add_chain': lambda args: str(sum(map(int, args)))
        }

    def evaluate(self, expr: ExprNode) -> str:
        if not expr.args:
            return expr.op
        evaluated_args = [self.evaluate(arg) for arg in expr.args]
        return self.rules[expr.op](evaluated_args)

# Example usage
if __name__ == '__main__':
    grammar = OperatorGrammar()
    input_str = "abcabcabc"
    expr = detect_patterns(input_str)
    print("Detected expression:", expr)
    reconstructed = grammar.evaluate(expr)
    print("Reconstructed:", reconstructed)
    assert reconstructed == input_str

Recursive Encoder #2

import hashlib
import json
import string
from typing import Union, Dict, List

# === Base Operators ===
OPERATORS = {
    'ADD': lambda x, y: x + y,
    'MUL': lambda x, y: x * y,
    'POW': lambda x, y: x ** y,
}

# === Derived Operators and Patterns ===
DERIVED_OPERATORS = {
    'MAX_DEPTH': lambda x: max(len(str(x)), 1),
    'MIRROR': lambda x: int(str(x)[::-1]),
    'FOLD': lambda x: sum(int(d) for d in str(x)),
}

# === Tokenizer ===
def tokenize(text: str) -> List[str]:
    return text.split()  # could be improved with NLP chunking

# === Expression Node ===
class ExprNode:
    def __init__(self, op, children):
        self.op = op
        self.children = children

    def evaluate(self):
        if self.op in OPERATORS:
            return OPERATORS[self.op](*(c.evaluate() if isinstance(c, ExprNode) else c for c in self.children))
        elif self.op in DERIVED_OPERATORS:
            return DERIVED_OPERATORS[self.op](self.children[0].evaluate() if isinstance(self.children[0], ExprNode) else self.children[0])

    def to_dict(self):
        return {
            'op': self.op,
            'children': [c.to_dict() if isinstance(c, ExprNode) else c for c in self.children]
        }

    def to_string(self):
        return json.dumps(self.to_dict(), sort_keys=True)

# === Symbol Dictionary ===
class SymbolDictionary:
    def __init__(self):
        self.expr_to_symbol: Dict[str, str] = {}
        self.symbol_to_expr: Dict[str, str] = {}
        self.counter = 0

    def _generate_symbol(self):
        sym = f"@SYM{self.counter}"
        self.counter += 1
        return sym

    def get_symbol(self, expr: ExprNode) -> str:
        key = expr.to_string()
        if key not in self.expr_to_symbol:
            symbol = self._generate_symbol()
            self.expr_to_symbol[key] = symbol
            self.symbol_to_expr[symbol] = key
        return self.expr_to_symbol[key]

    def get_expression(self, symbol: str) -> ExprNode:
        if symbol not in self.symbol_to_expr:
            raise ValueError(f"Unknown symbol: {symbol}")
        return parse_expression(json.loads(self.symbol_to_expr[symbol]))

# === Parser ===
def parse_expression(data: Union[dict, int]) -> ExprNode:
    if isinstance(data, int):
        return data
    op = data['op']
    children = [parse_expression(c) for c in data['children']]
    return ExprNode(op, children)

# === Expression Encoder ===
def encode_chunk(chunk: str, symbols: SymbolDictionary) -> str:
    val = sum(ord(c) for c in chunk)
    expr = ExprNode('FOLD', [ExprNode('MUL', [val, 1])])
    return symbols.get_symbol(expr)

# === High-Level Compression ===
def compress_text(text: str, symbols: SymbolDictionary) -> str:
    chunks = tokenize(text)
    chunk_syms = [encode_chunk(chunk, symbols) for chunk in chunks]
    expr = ExprNode('ADD', [i for i in chunk_syms])
    return symbols.get_symbol(expr)

# === Decompression ===
def decompress_symbol(symbol: str, symbols: SymbolDictionary) -> str:
    expr = symbols.get_expression(symbol)
    return reconstruct_string(expr)

def reconstruct_string(expr: Union[ExprNode, str]) -> str:
    if isinstance(expr, str):
        expr = symbols.get_expression(expr)
    if isinstance(expr, ExprNode):
        if expr.op == 'ADD':
            return ' '.join(reconstruct_string(c) for c in expr.children)
        elif expr.op == 'FOLD':
            val = expr.children[0].children[0] if isinstance(expr.children[0], ExprNode) else expr.children[0]
            return chr(val % 256) * (val // 256)
    return str(expr)

# === Example Run ===
symbols = SymbolDictionary()
input_text = "HELLO WORLD THIS IS A TEST"
final_symbol = compress_text(input_text, symbols)
print("Final Symbol:", final_symbol)
print("Decompressed:", decompress_symbol(final_symbol, symbols))

Recursive Encoder Expanded

import hashlib
import json
import threading
from collections import defaultdict

# --- Symbol Dictionary (Persistent, Safe) ---
class SymbolDictionary:
    def __init__(self):
        self.lock = threading.Lock()
        self.symbol_to_expr = {}
        self.expr_to_symbol = {}
        self.next_id = 0

    def _generate_symbol(self):
        sym = f"\uE{self.next_id:03X}"
        self.next_id += 1
        return sym

    def get_symbol(self, expr):
        key = expr.hash()
        with self.lock:
            if key in self.expr_to_symbol:
                return self.expr_to_symbol[key]
            sym = self._generate_symbol()
            self.symbol_to_expr[sym] = expr
            self.expr_to_symbol[key] = sym
            return sym

    def get_expr(self, symbol):
        return self.symbol_to_expr.get(symbol, None)


# --- Expression Node (Recursive Tree with Custom Operators) ---
class ExpressionNode:
    def __init__(self, op, children):
        self.op = op
        self.children = children

    def serialize(self):
        return {"op": self.op, "children": [c.serialize() if isinstance(c, ExpressionNode) else c for c in self.children]}

    def hash(self):
        return hashlib.sha256(json.dumps(self.serialize(), sort_keys=True).encode()).hexdigest()

    def __repr__(self):
        return f"({self.op} {' '.join(map(str, self.children))})"


# --- Expression Builder (Expanded Grammar) ---
class ExpressionBuilder:
    def __init__(self):
        self.operators = ['SEQ', 'SUM', 'MUL', 'POW', 'DIV', 'XOR']  # Expandable

    def build_expr(self, tokens):
        if len(tokens) == 1:
            return tokens[0]
        chunks = [tokens[i:i + 2] for i in range(0, len(tokens), 2)]
        nodes = [ExpressionNode(self.operators[i % len(self.operators)], chunk) for i, chunk in enumerate(chunks) if len(chunk) == 2]
        while len(nodes) > 1:
            a, b = nodes.pop(), nodes.pop()
            nodes.append(ExpressionNode(self.operators[(len(nodes)) % len(self.operators)], [a, b]))
        return nodes[0]


# --- Codec Interface ---
class RecursiveSymbolicCodec:
    def __init__(self):
        self.symbols = SymbolDictionary()
        self.builder = ExpressionBuilder()

    def encode(self, string):
        tokens = list(string)
        expr_tree = self.builder.build_expr(tokens)
        symbol = self.symbols.get_symbol(expr_tree)
        return symbol

    def decode(self, symbol):
        expr = self.symbols.get_expr(symbol)
        if not expr:
            raise ValueError("Unknown symbol")
        return self.eval_expr(expr)

    def eval_expr(self, expr):
        if isinstance(expr, str):
            return expr
        evaluated = [self.eval_expr(c) for c in expr.children]
        return ''.join(evaluated)


# --- Example Usage ---
if __name__ == '__main__':
    codec = RecursiveSymbolicCodec()

    s = "HELLO WORLD"
    symbol = codec.encode(s)
    print(f"Encoded Symbol: {symbol}")
    recovered = codec.decode(symbol)
    print(f"Recovered: {recovered}")

Parser Module

# parser.py

import re
from typing import List, Union

# Node classes for our expression tree
class ExprNode:
    def __init__(self, operator: str, args: List['ExprNode']):
        self.operator = operator
        self.args = args

    def __repr__(self):
        return f"{self.operator}({', '.join(map(str, self.args))})"

class LeafNode:
    def __init__(self, value: Union[int, str]):
        self.value = value

    def __repr__(self):
        return f"{self.value}"

# Tokenizer for parsing input expression strings
class Tokenizer:
    def __init__(self, text: str):
        self.tokens = re.findall(r'[A-Za-z_][A-Za-z0-9_]*|\d+|[(),]', text)
        self.position = 0

    def next(self):
        if self.position < len(self.tokens):
            tok = self.tokens[self.position]
            self.position += 1
            return tok
        return None

    def peek(self):
        if self.position < len(self.tokens):
            return self.tokens[self.position]
        return None

    def expect(self, value):
        tok = self.next()
        if tok != value:
            raise SyntaxError(f"Expected '{value}', got '{tok}'")

# Recursive descent parser
class ExpressionParser:
    def __init__(self, tokenizer: Tokenizer):
        self.tokenizer = tokenizer

    def parse_expr(self):
        token = self.tokenizer.peek()

        if re.match(r'\d+', token):  # Leaf integer
            return LeafNode(int(self.tokenizer.next()))

        name = self.tokenizer.next()
        self.tokenizer.expect('(')
        args = self.parse_expr_list()
        self.tokenizer.expect(')')
        return ExprNode(name, args)

    def parse_expr_list(self):
        args = [self.parse_expr()]
        while self.tokenizer.peek() == ',':
            self.tokenizer.next()
            args.append(self.parse_expr())
        return args

# Example usage (unit test style)
if __name__ == '__main__':
    test_string = "add(mul(2,3), pow(2,3))"
    tokenizer = Tokenizer(test_string)
    parser = ExpressionParser(tokenizer)
    tree = parser.parse_expr()
    print("Parsed Expression Tree:", tree)

Recursive Encoder #3

# recursive_encoder.py

from parser_module import ExpressionParser
from symbol_dict import SymbolDictionary
from expression_tree import ExpressionNode
import hashlib

class RecursiveEncoder:
    def __init__(self, symbol_dict=None):
        self.parser = ExpressionParser()
        self.symbol_dict = symbol_dict or SymbolDictionary()

    def encode_string(self, input_string):
        # Step 1: Tokenize string to expression
        expression_tree = ExpressionNode.from_string(input_string)

        # Step 2: Optimize expression using compression heuristics
        optimized_tree = expression_tree.optimize()

        # Step 3: Serialize to symbolic expression
        expression_str = optimized_tree.to_expression()

        # Step 4: Use or assign symbol
        symbol = self.symbol_dict.get_or_assign_symbol(expression_str)

        return symbol

    def decode_symbol(self, symbol):
        # Step 1: Lookup expression from symbol
        expression_str = self.symbol_dict.get_expression(symbol)
        if not expression_str:
            raise ValueError("Unknown symbol")

        # Step 2: Parse expression string
        expression_tree = self.parser.parse(expression_str)

        # Step 3: Evaluate expression tree to retrieve original string
        return expression_tree.evaluate()


# Example usage
if __name__ == '__main__':
    encoder = RecursiveEncoder()
    original = "HELLO WORLD"
    symbol = encoder.encode_string(original)
    print("Encoded Symbol:", symbol)

    decoded = encoder.decode_symbol(symbol)
    print("Decoded String:", decoded)

Grammar Engine Compression

import hashlib
import itertools
from typing import Any, List, Dict, Tuple, Union

# -- Expression Node Definition -- #
class ExpressionNode:
    def __init__(self, operator: str, operands: List[Union['ExpressionNode', str, int]]):
        self.operator = operator
        self.operands = operands

    def serialize(self) -> str:
        if isinstance(self, SymbolNode):
            return self.symbol
        inner = ','.join(
            operand.serialize() if isinstance(operand, ExpressionNode) else str(operand)
            for operand in self.operands
        )
        return f"{self.operator}({inner})"

    def fingerprint(self) -> str:
        # Normalize and hash
        norm = self.serialize()
        return hashlib.sha256(norm.encode()).hexdigest()

    def __repr__(self):
        return self.serialize()

# -- Symbol Node (For Leaf Compression) -- #
class SymbolNode(ExpressionNode):
    def __init__(self, symbol: str):
        self.symbol = symbol
        self.operator = 'symbol'
        self.operands = []

    def serialize(self) -> str:
        return self.symbol

# -- Symbol Dictionary with Subtree Reuse -- #
class SymbolDictionary:
    def __init__(self):
        self.expr_to_symbol: Dict[str, str] = {}
        self.symbol_to_expr: Dict[str, ExpressionNode] = {}
        self.next_id = 0

    def _gen_symbol(self) -> str:
        # Extendable: map to unicode, base4096, or multichar tokens
        sym = f"𝕊{self.next_id}"
        self.next_id += 1
        return sym

    def get_or_create_symbol(self, node: ExpressionNode) -> SymbolNode:
        fid = node.fingerprint()
        if fid in self.expr_to_symbol:
            return SymbolNode(self.expr_to_symbol[fid])
        sym = self._gen_symbol()
        self.expr_to_symbol[fid] = sym
        self.symbol_to_expr[sym] = node
        return SymbolNode(sym)

    def resolve(self, symbol: str) -> ExpressionNode:
        return self.symbol_to_expr[symbol]

# -- Grammar Engine with Operator Expansion -- #
class GrammarEngine:
    def __init__(self):
        self.symbols = SymbolDictionary()

    def build_expression(self, text: str) -> ExpressionNode:
        # Naive tokenizer: real impl would be recursive and grammar-aware
        token_nodes = [ExpressionNode('char', [ord(c)]) for c in text]
        while len(token_nodes) > 1:
            # Pairwise combine using a rolling operator (extendable)
            a = token_nodes.pop(0)
            b = token_nodes.pop(0)
            combined = ExpressionNode('+', [a, b])
            compressed = self.symbols.get_or_create_symbol(combined)
            token_nodes.insert(0, compressed)
        return token_nodes[0]

    def decode_expression(self, node: Union[SymbolNode, ExpressionNode]) -> str:
        if isinstance(node, SymbolNode):
            resolved = self.symbols.resolve(node.symbol)
            return self.decode_expression(resolved)
        elif node.operator == 'char':
            return chr(node.operands[0])
        else:
            return ''.join(self.decode_expression(op) if isinstance(op, ExpressionNode) else str(op)
                           for op in node.operands)

# -- Example Usage -- #
if __name__ == "__main__":
    engine = GrammarEngine()
    
    input_str = "HELLO"
    expr = engine.build_expression(input_str)
    print("Encoded Expression:", expr)

    output_str = engine.decode_expression(expr)
    print("Decoded:", output_str)

Recursive Codec Grammar

import hashlib
import json
import threading
from collections import defaultdict, namedtuple

# Define expression node
tree_id_counter = 0

class ExprNode:
    def __init__(self, op, args):
        self.op = op
        self.args = args
        self.hash = self.compute_hash()

    def compute_hash(self):
        h = hashlib.sha256()
        h.update(self.op.encode())
        for a in self.args:
            h.update(a.hash.encode() if isinstance(a, ExprNode) else str(a).encode())
        return h.hexdigest()

    def __repr__(self):
        return f"({self.op} {' '.join(map(str, self.args))})"

# Symbol dictionary for compression
class SymbolDictionary:
    def __init__(self):
        self.expr_to_symbol = {}
        self.symbol_to_expr = {}
        self.counter = 0

    def get_symbol(self, expr):
        key = expr.hash
        if key not in self.expr_to_symbol:
            symbol = f"§{self.counter}§"
            self.expr_to_symbol[key] = symbol
            self.symbol_to_expr[symbol] = expr
            self.counter += 1
        return self.expr_to_symbol[key]

    def decode_symbol(self, symbol):
        return self.symbol_to_expr.get(symbol)

# Grammar and parser
OPERATORS = ['+', '*', '^', 'wrap', 'concat']

# Recursive encoder
class RecursiveEncoder:
    def __init__(self):
        self.symdict = SymbolDictionary()

    def encode_string(self, string):
        tokens = list(string)
        tree = self.build_expr_tree(tokens)
        return self.symdict.get_symbol(tree)

    def build_expr_tree(self, tokens):
        if len(tokens) == 1:
            return ExprNode('lit', [tokens[0]])
        elif len(tokens) == 2:
            return ExprNode('concat', [self.build_expr_tree([tokens[0]]), self.build_expr_tree([tokens[1]])])
        else:
            mid = len(tokens) // 2
            return ExprNode('concat', [self.build_expr_tree(tokens[:mid]), self.build_expr_tree(tokens[mid:])])

# Decoder
class RecursiveDecoder:
    def __init__(self, symdict):
        self.symdict = symdict

    def decode_symbol(self, symbol):
        expr = self.symdict.decode_symbol(symbol)
        return self.evaluate(expr)

    def evaluate(self, expr):
        if expr.op == 'lit':
            return expr.args[0]
        elif expr.op == 'concat':
            return self.evaluate(expr.args[0]) + self.evaluate(expr.args[1])
        else:
            raise ValueError(f"Unknown op: {expr.op}")

# Example usage
encoder = RecursiveEncoder()
decoder = RecursiveDecoder(encoder.symdict)

encoded = encoder.encode_string("HELLO")
print("Encoded Symbol:", encoded)

decoded = decoder.decode_symbol(encoded)
print("Decoded String:", decoded)

Recursive Encoder #4

import hashlib
import json
from typing import Any, Tuple, Union, Dict

# --- Operator Generator: maps expression signatures to symbols ---
class OperatorGenerator:
    def __init__(self):
        self.operators: Dict[str, str] = {}  # signature -> symbol
        self.symbol_to_expr: Dict[str, str] = {}  # reverse lookup for decoding
        self.next_symbol_code = 0xE000  # Start in Private Use Area (can go multi-char later)

    def get_operator(self, signature: str) -> str:
        if signature in self.operators:
            return self.operators[signature]
        symbol = self._generate_new_symbol()
        self.operators[signature] = symbol
        self.symbol_to_expr[symbol] = signature
        return symbol

    def _generate_new_symbol(self) -> str:
        symbol = chr(self.next_symbol_code)
        self.next_symbol_code += 1
        return symbol

# --- Expression signature (hashable form of expression) ---
def expr_signature(expr: Any) -> str:
    return hashlib.sha256(json.dumps(expr, sort_keys=True).encode()).hexdigest()

# --- Encoder ---
def encode_expression(expr: Any, op_gen: OperatorGenerator) -> str:
    if isinstance(expr, str):
        return expr
    sig = expr_signature(expr)
    return op_gen.get_operator(sig)

# --- Tokenization & Expression building (simple placeholder logic) ---
def tokenize_string(s: str) -> list:
    return list(s)

def build_expression(tokens: list) -> Any:
    if len(tokens) <= 1:
        return tokens[0]
    left = tokens[0]
    for t in tokens[1:]:
        left = ("concat", left, t)
    return left

# --- Single-layer encoding chunk ---
def encode_chunk(input_string: str, op_gen: OperatorGenerator) -> Tuple[str, Any]:
    tokens = tokenize_string(input_string)
    expr_tree = build_expression(tokens)
    encoded_symbol = encode_expression(expr_tree, op_gen)
    return encoded_symbol, expr_tree

# --- Recursive Encoding ---
def recursive_encode(input_string: str, max_depth=5, current_depth=0, op_gen=None) -> str:
    if op_gen is None:
        op_gen = OperatorGenerator()

    if current_depth == max_depth or len(input_string) <= 4:
        encoded, _ = encode_chunk(input_string, op_gen)
        return encoded

    encoded, _ = encode_chunk(input_string, op_gen)
    return recursive_encode(encoded, max_depth, current_depth + 1, op_gen)

# --- Decoder ---
def decode_symbol(symbol: str, op_gen: OperatorGenerator) -> str:
    sig = op_gen.symbol_to_expr.get(symbol)
    if not sig:
        return "?"
    # In a real system, you would parse and evaluate the signature to recover the original string
    return f"[EXPR:{sig[:6]}...]"  # placeholder

# --- Demo ---
if __name__ == "__main__":
    test_string = "HELLO WORLD"
    op_gen = OperatorGenerator()
    encoded = recursive_encode(test_string, max_depth=5, op_gen=op_gen)
    print(f"Encoded: {encoded}")
    print(f"Decoded (symbolic): {decode_symbol(encoded, op_gen)}")

Recursive Encoder #5

import hashlib
import itertools
import json
from collections import defaultdict
from functools import lru_cache

# === Symbol Dictionary (Persistent Mapping) ===
symbol_table = {}
reverse_symbol_table = {}
symbol_counter = itertools.count(start=0)

# Extended alphabet base for symbol generation
SYMBOL_BASE_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!@#$%^&*()-_=+[]{};:,.<>?/|\\'

# === Safe Persistent Store ===
def save_symbol_table(path="symbol_table.json"):
    with open(path, "w") as f:
        json.dump(symbol_table, f)

def load_symbol_table(path="symbol_table.json"):
    global symbol_table, reverse_symbol_table
    try:
        with open(path) as f:
            symbol_table = json.load(f)
            reverse_symbol_table = {v: k for k, v in symbol_table.items()}
    except FileNotFoundError:
        pass

# === Symbol Generator ===
def get_next_symbol():
    n = next(symbol_counter)
    base = len(SYMBOL_BASE_ALPHABET)
    result = ''
    while True:
        result = SYMBOL_BASE_ALPHABET[n % base] + result
        n //= base
        if n == 0:
            break
    return result

# === Recursive Expression Tree Builder ===
class ExprNode:
    def __init__(self, op, args):
        self.op = op
        self.args = args

    def __str__(self):
        return f"{self.op}({', '.join(map(str, self.args))})"

    def serialize(self):
        return (self.op, tuple(arg.serialize() if isinstance(arg, ExprNode) else arg for arg in self.args))

    def signature(self):
        return hashlib.sha256(str(self.serialize()).encode()).hexdigest()

# === Tokenizer ===
def tokenize(text):
    return list(text)

# === Expression Generator ===
def build_expr(tokens):
    if not tokens:
        return None
    if len(tokens) == 1:
        return ExprNode('lit', [tokens[0]])

    # Try compression heuristic: binary split
    mid = len(tokens) // 2
    left = build_expr(tokens[:mid])
    right = build_expr(tokens[mid:])
    return ExprNode('concat', [left, right])

# === Encoder ===
def encode(text):
    tokens = tokenize(text)
    expr = build_expr(tokens)
    return encode_expr(expr)

@lru_cache(maxsize=None)
def encode_expr(expr):
    sig = expr.signature()
    if sig in symbol_table:
        return symbol_table[sig]

    symbol = get_next_symbol()
    symbol_table[sig] = symbol
    reverse_symbol_table[symbol] = expr
    return symbol

# === Decoder ===
def decode(symbol):
    expr = reverse_symbol_table.get(symbol)
    if not expr:
        return ""
    return evaluate_expr(expr)

def evaluate_expr(expr):
    if expr.op == 'lit':
        return expr.args[0]
    elif expr.op == 'concat':
        return ''.join(evaluate_expr(arg) for arg in expr.args)
    return ''

# === Save/Load on init ===
load_symbol_table()

# === Example usage ===
if __name__ == "__main__":
    test = "HELLO WORLD"
    sym = encode(test)
    print(f"Encoded: {sym}")
    print(f"Decoded: {decode(sym)}")
    save_symbol_table()

Recursive Encoder #6

import hashlib
import threading
from concurrent.futures import ThreadPoolExecutor
from collections import defaultdict
import itertools
import json

# Symbolic Dictionary Manager with Persistence
class SymbolDictionary:
    def __init__(self):
        self.expr_to_symbol = {}
        self.symbol_to_expr = {}
        self.next_symbol_index = 0
        self.lock = threading.Lock()

    def get_symbol(self, expr):
        key = json.dumps(expr, sort_keys=True)
        with self.lock:
            if key not in self.expr_to_symbol:
                symbol = self._generate_symbol(self.next_symbol_index)
                self.expr_to_symbol[key] = symbol
                self.symbol_to_expr[symbol] = expr
                self.next_symbol_index += 1
            return self.expr_to_symbol[key]

    def get_expression(self, symbol):
        return self.symbol_to_expr.get(symbol)

    def _generate_symbol(self, index):
        # Multi-char symbols if needed beyond Unicode
        if index < 0xE000:
            return chr(0xE000 + index)
        return f'<SYM{index}>'

    def save(self, filename):
        with open(filename, 'w') as f:
            json.dump({'expr_to_symbol': self.expr_to_symbol, 'symbol_to_expr': self.symbol_to_expr}, f)

    def load(self, filename):
        with open(filename, 'r') as f:
            data = json.load(f)
            self.expr_to_symbol = data['expr_to_symbol']
            self.symbol_to_expr = data['symbol_to_expr']

# Recursive Expression Encoding
class ExpressionNode:
    def __init__(self, op, children=None, value=None):
        self.op = op
        self.children = children if children else []
        self.value = value

    def to_dict(self):
        if self.op == 'val':
            return {'val': self.value}
        return {'op': self.op, 'args': [c.to_dict() for c in self.children]}

    def __repr__(self):
        return f"{self.op}({', '.join(map(str, self.children))})" if self.children else str(self.value)

# Tokenizer with frequency compression prep
def tokenize_string(s):
    return list(s)  # Can extend to n-grams or entropy-reducing segmentations

def build_expression(tokens):
    # Naive balanced nesting + operator alternation
    if len(tokens) == 1:
        return ExpressionNode('val', value=ord(tokens[0]))
    mid = len(tokens) // 2
    left = build_expression(tokens[:mid])
    right = build_expression(tokens[mid:])
    return ExpressionNode('add', [left, right])  # Change operator heuristically

def compress_expression(expr, subtree_map):
    key = json.dumps(expr.to_dict(), sort_keys=True)
    if key in subtree_map:
        return subtree_map[key]
    if expr.op == 'val':
        return expr
    new_children = [compress_expression(c, subtree_map) for c in expr.children]
    new_expr = ExpressionNode(expr.op, new_children)
    subtree_map[key] = new_expr
    return new_expr

def encode_expression(expr, symdict):
    return symdict.get_symbol(expr.to_dict())

def decode_symbol(symbol, symdict):
    expr_dict = symdict.get_expression(symbol)
    return eval_expression_dict(expr_dict)

def eval_expression_dict(d):
    if 'val' in d:
        return chr(d['val'])
    op = d['op']
    args = [eval_expression_dict(arg) for arg in d['args']]
    if op == 'add':
        return ''.join(args)
    return ''.join(args)  # Extend for other ops

# Parallel encoding
symdict = SymbolDictionary()

def full_encode_pipeline(s):
    tokens = tokenize_string(s)
    tree = build_expression(tokens)
    compressed = compress_expression(tree, {})
    symbol = encode_expression(compressed, symdict)
    return symbol

def full_decode_pipeline(symbol):
    return decode_symbol(symbol, symdict)

def parallel_encode(s, chunk_size=512):
    chunks = [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)]
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(full_encode_pipeline, chunks))
    return results

# Example
if __name__ == '__main__':
    test_string = "HELLO WORLD, THIS IS A TEST OF THE RECURSIVE ENCODER!"
    encoded = parallel_encode(test_string)
    print("Encoded Symbols:", encoded)
    decoded = ''.join([full_decode_pipeline(sym) for sym in encoded])
    print("Decoded String:", decoded)

Recursive Encoder Decoder

import hashlib
import math
import threading
import json
import uuid
from collections import defaultdict

# === Symbol Dictionary ===
symbol_dict = {}
reverse_symbol_dict = {}
lock = threading.Lock()

# Safe symbol generator that uses UUIDs + custom scheme
def generate_symbol(expression):
    key = str(expression)
    with lock:
        if key in symbol_dict:
            return symbol_dict[key]
        symbol = "⟦" + hashlib.sha256(key.encode()).hexdigest()[:8] + "⟧"
        symbol_dict[key] = symbol
        reverse_symbol_dict[symbol] = expression
        return symbol

# === Expression Builder ===
def build_expression(tokens):
    if len(tokens) == 1:
        return tokens[0]
    elif len(tokens) == 2:
        return f"sum({tokens[0]},{tokens[1]})"
    else:
        mid = len(tokens) // 2
        left = build_expression(tokens[:mid])
        right = build_expression(tokens[mid:])
        return f"mul({left},{right})"

# === Tokenizer ===
def tokenize_string(s):
    freq = defaultdict(int)
    for c in s:
        freq[c] += 1
    return [f"{c}^{freq[c]}" for c in sorted(freq.keys())]

# === Encoder ===
def encode_string(s):
    tokens = tokenize_string(s)
    expr = build_expression(tokens)
    symbol = generate_symbol(expr)
    return symbol

# === Safe Expression Evaluator ===
def evaluate_expression(expr):
    if expr.startswith("sum("):
        inner = expr[4:-1]
        a, b = map(evaluate_expression, split_args(inner))
        return a + b
    elif expr.startswith("mul("):
        inner = expr[4:-1]
        a, b = map(evaluate_expression, split_args(inner))
        return a * b
    elif "^" in expr:
        c, exp = expr.split("^")
        return ord(c) * int(exp)
    else:
        return ord(expr)

def split_args(inner):
    depth, result, current = 0, [], ""
    for ch in inner:
        if ch == "," and depth == 0:
            result.append(current)
            current = ""
        else:
            if ch == "(": depth += 1
            elif ch == ")": depth -= 1
            current += ch
    result.append(current)
    return result

# === Decoder ===
def decode_symbol(symbol):
    expr = reverse_symbol_dict.get(symbol)
    if not expr:
        raise ValueError("Unknown symbol")
    return evaluate_expression(expr)

# === Save/Load Dictionary ===
def save_dictionary(filename="symbol_dict.json"):
    with open(filename, "w") as f:
        json.dump(symbol_dict, f)

def load_dictionary(filename="symbol_dict.json"):
    global symbol_dict, reverse_symbol_dict
    with open(filename, "r") as f:
        symbol_dict = json.load(f)
        reverse_symbol_dict = {v: k for k, v in symbol_dict.items()}

# === Example Usage ===
if __name__ == "__main__":
    sample = "HELLO WORLD"
    symbol = encode_string(sample)
    print(f"Symbol for '{sample}': {symbol}")
    print(f"Decoded (numerical score): {decode_symbol(symbol)}")

Megc Encoder Deploy

# main.py - Entrypoint for MEGC Recursive Encoder/Decoder

from encoder import encode_string
from decoder import decode_symbol
from dictionary import SymbolDictionary
import argparse

# Persistent dictionary instance
dictionary = SymbolDictionary('symbol_map.json')

parser = argparse.ArgumentParser(description="MEGC: Mapped Entropic Golden Codec")
subparsers = parser.add_subparsers(dest='command')

# Encode Command
encode_parser = subparsers.add_parser('encode')
encode_parser.add_argument('input', type=str, help='Input string to encode')

# Decode Command
decode_parser = subparsers.add_parser('decode')
decode_parser.add_argument('symbol', type=str, help='Symbol to decode')

args = parser.parse_args()

if args.command == 'encode':
    symbol = encode_string(args.input, dictionary)
    print(f"Encoded Symbol: {symbol}")

elif args.command == 'decode':
    result = decode_symbol(args.symbol, dictionary)
    print(f"Decoded String: {result}")
else:
    parser.print_help()

Meg Codec Main #1

# meg_codec_main.py
# Entry point for MEGC - Mapped Entropic Golden Codec

from encoder import RecursiveEncoder
from decoder import RecursiveDecoder
from dictionary import SymbolDictionary

# Initialize persistent symbol dictionary
symbol_dict = SymbolDictionary(persist_file="megcodec_symbols.json")

# Instantiate encoder and decoder with the same dictionary
encoder = RecursiveEncoder(symbol_dict)
decoder = RecursiveDecoder(symbol_dict)

def encode_string(input_string):
    print("Encoding:", input_string)
    encoded = encoder.encode(input_string)
    print("Encoded:", encoded)
    return encoded

def decode_string(encoded_symbol):
    print("Decoding symbol:", encoded_symbol)
    decoded = decoder.decode(encoded_symbol)
    print("Decoded string:", decoded)
    return decoded

if __name__ == "__main__":
    # Example usage
    original = "HELLO WORLD, THIS IS A TEST OF THE RECURSIVE ENCODER!"
    encoded = encode_string(original)
    decoded = decode_string(encoded)

    if decoded == original:
        print("✅ Roundtrip successful")
    else:
        print("❌ Mismatch!")

Meg Codec Main #2

import os
import pickle
from encoder import RecursiveEncoder
from decoder import RecursiveDecoder
from dictionary import SymbolDictionary

# Configuration
DICTIONARY_PATH = "symbol_dict.pkl"

# Initialize persistent symbol dictionary
dictionary = SymbolDictionary(DICTIONARY_PATH)

# Create encoder and decoder with shared symbol dictionary
encoder = RecursiveEncoder(dictionary)
decoder = RecursiveDecoder(dictionary)

def encode_text_to_symbol(text: str) -> str:
    """Encode full text input into a compressed symbolic representation."""
    expression_tree = encoder.build_expression_tree(text)
    compressed_tree = encoder.compress_tree(expression_tree)
    symbol = encoder.tree_to_symbol(compressed_tree)
    return symbol

def decode_symbol_to_text(symbol: str) -> str:
    """Decode a symbolic representation back to the original text."""
    expression = decoder.symbol_to_expression(symbol)
    reconstructed_text = decoder.evaluate_expression(expression)
    return reconstructed_text

def encode_file(input_path: str, output_path: str):
    with open(input_path, 'r', encoding='utf-8') as f:
        text = f.read()
    symbol = encode_text_to_symbol(text)
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(symbol)
    dictionary.save()  # Save updated symbol mappings

def decode_file(input_path: str, output_path: str):
    with open(input_path, 'r', encoding='utf-8') as f:
        symbol = f.read()
    text = decode_symbol_to_text(symbol)
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(text)
    dictionary.save()

if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser(description="MEGC Symbolic Codec")
    parser.add_argument('mode', choices=['encode', 'decode'])
    parser.add_argument('input', help="Input file path")
    parser.add_argument('output', help="Output file path")
    args = parser.parse_args()

    if args.mode == 'encode':
        encode_file(args.input, args.output)
    elif args.mode == 'decode':
        decode_file(args.input, args.output)

Josef_Founder · June 15, 2025, 5:25pm