Source code for uniqc.compile.qasm.qasm_base_parser

"""OpenQASM 2.0 base parser module.

This module provides the base parser for OpenQASM 2.0 quantum circuit representation,
including parsing qreg/creg definitions, quantum operations, and measurement statements.

Key exports:
    OpenQASM2_BaseParser: Base parser class for OpenQASM 2.0 circuits.
"""

__all__ = ["OpenQASM2_BaseParser"]

from uniqc.circuit_builder.qcircuit import Circuit
from uniqc.circuit_builder.translate_qasm2_oir import get_opcode_from_QASM2
from uniqc.compile.originir.originir_base_parser import OriginIR_BaseParser

from ._safe_eval import safe_eval_param
from .exceptions import NotSupportedGateError, RegisterDefinitionError, RegisterNotFoundError, RegisterOutOfRangeError
from .qasm_line_parser import OpenQASM2_LineParser


[docs] class OpenQASM2_BaseParser: """Parser for OpenQASM 2.0 quantum circuit representation. Attributes: qregs: List of quantum register tuples (name, size). cregs: List of classical register tuples (name, size). n_qubit: Total number of qubits. n_cbit: Total number of classical bits. program_body: List of operation opcodes. measure_qubits: List of measurement tuples (qubit, cbit). """ def __init__(self): self.qregs = list() self.cregs = list() self.n_qubit = None self.n_cbit = None self.program_body = list() # contain the opcodes self.raw_qasm = None self.formatted_qasm = None # for qasm statement collection self.collected_qregs_str = list() self.collected_cregs_str = list() self.collected_measurements_str = list() self.program_body_str = list() # contain strs of the program body # for measurement mapping self.measure_qubits: list[tuple[int, int]] = list() def _format_and_check(self): """Format the original qasm code and check if it is valid. As of v0.0.11.dev23, the parser **inlines** user-defined ``gate <name>(<params>) <args> { <body> }`` blocks: each call to such a gate in the program body is expanded into a sequence of primitive gate calls with parameters and qubit arguments substituted. Gate names that match a known qelib1.inc / OriginIR opcode are still recognised directly via the gate-name → opcode table in ``translate_qasm2_oir``. Inline ``opaque`` declarations are stripped (no body to inline). ``if`` statements are still not supported. A canonical format of qasm code is like: OPENQASM 2.0; include "qelib1.inc"; qreg q[n]; <other qreg definitions> creg c[m]; <other creg definitions> <program_body> These rules will be applied in the formatted qasm code. 1. OPENQASM 2.0 and include "qelib1.inc" will be removed. 2. ``gate ... { ... }`` definitions are stripped (their bodies are not expanded; recognition relies on the gate-name registry). 3. qreg definitions are collected together; so as creg definitions. 4. program body is line-wise, separated by semicolons 5. measurements must be at the end of the program body. 6. barriers are kept in the program body. """ if self.raw_qasm is None: raise ValueError("No raw qasm code provided.") import re # ------------------------------------------------------------------ # Extract user-defined gate bodies BEFORE stripping the def blocks. # We support the common form: # gate <name>(<param1>, <param2>, ...) <arg1>, <arg2>, ... { <body> } # or the parameter-less: # gate <name> <arg1>, <arg2>, ... { <body> } # The body is split on ';' into individual statements that will be # substituted in-place when the gate is called. # ``opaque`` declarations have no body and are simply stripped. # ------------------------------------------------------------------ gate_def_re = re.compile( r"\bgate\s+([A-Za-z_][A-Za-z0-9_]*)" # 1: gate name r"(?:\s*\(([^)]*)\))?" # 2: optional params r"\s+([A-Za-z_][A-Za-z0-9_,\s]*)" # 3: qubit args r"\s*\{([^{}]*)\}", # 4: body flags=re.MULTILINE | re.DOTALL, ) gate_defs: dict[str, tuple[list[str], list[str], list[str]]] = {} for m in gate_def_re.finditer(self.raw_qasm): name = m.group(1).strip() params_str = (m.group(2) or "").strip() args_str = m.group(3).strip() body_str = m.group(4).strip() params = [p.strip() for p in params_str.split(",") if p.strip()] args = [a.strip() for a in args_str.split(",") if a.strip()] body_lines = [s.strip() for s in body_str.split(";") if s.strip()] gate_defs[name] = (params, args, body_lines) self._gate_defs = gate_defs # exposed for tests / introspection # Strip both opaque decls and gate-def blocks now that we've collected them. def _strip_gate_defs(src: str) -> str: # gate ... { ... } block (matches the same shape we collected) src = gate_def_re.sub("", src) # opaque ... ; (no body) src = re.sub( r"\bopaque\b\s+[A-Za-z_][A-Za-z0-9_]*" r"(?:\s*\([^)]*\))?\s*[^;{]*;", "", src, flags=re.MULTILINE | re.DOTALL, ) return src raw = _strip_gate_defs(self.raw_qasm) # check if there is "if" statements (use word boundary regex to avoid # false positives on substrings like "Unified" that contain "if"). if re.search(r"\bif\s*\(", raw): raise NotSupportedGateError("If statements are not supported yet.") collected_qregs = list() collected_cregs = list() collected_measurements = list() program_body = list() # split all codes by semicolons (use the gate-def-stripped source) codes = raw.split(";") for code in codes: # strip leading and trailing whitespaces code = code.strip() # remove comments and OPENQASM/include statements if code.startswith("//") or code == "" or code.startswith("include") or code.startswith("OPENQASM"): continue # handle qreg and creg definitions elif code.startswith("qreg"): collected_qregs.append(code) elif code.startswith("creg"): collected_cregs.append(code) elif code.startswith("measure"): collected_measurements.append(code) else: program_body.append(code) ret_qasm = "{};\n{};\n{};\n{};".format( ";\n".join(collected_qregs), ";\n".join(collected_cregs), ";\n".join(program_body), ";\n".join(collected_measurements), ) return ret_qasm, collected_qregs, collected_cregs, program_body, collected_measurements @staticmethod def _split_outside_parens(text: str, sep: str = ",") -> list[str]: """Split ``text`` on ``sep`` while respecting parenthesized groups.""" parts: list[str] = [] buf: list[str] = [] depth = 0 for ch in text: if ch == "(": depth += 1 buf.append(ch) elif ch == ")": depth -= 1 buf.append(ch) elif ch == sep and depth == 0: parts.append("".join(buf).strip()) buf = [] else: buf.append(ch) tail = "".join(buf).strip() if tail: parts.append(tail) return parts @staticmethod def _eval_param_expr(expr: str, bindings: dict[str, str]) -> str: """Evaluate a QASM parameter expression with symbolic substitutions. ``bindings`` maps parameter names (from a gate def signature) to the argument expressions supplied at the call site. Numeric constants ``pi`` / ``e`` are recognised; other identifiers are looked up in ``bindings`` and substituted as parenthesised expressions so operator precedence is preserved. """ import re as _re # Substitute bound parameters first (longest names first to avoid # prefix collisions like ``th`` vs ``theta``). for name in sorted(bindings, key=len, reverse=True): expr = _re.sub(rf"\b{_re.escape(name)}\b", f"({bindings[name]})", expr) try: value = safe_eval_param(expr) except ValueError: # Could not collapse to a number (e.g. nested unbound identifier); # return the substituted expression so downstream parsing surfaces # the issue with full context. Note: ``safe_eval_param`` raises # ``ValueError`` for both genuinely-unevaluable expressions *and* # for disallowed (potentially-malicious) constructs — the downstream # parser will reject the latter with a clearer error. return expr return repr(float(value)) def _expand_custom_gate_call(self, line: str, depth: int = 0) -> list[str]: """Expand a single program-body line into primitive lines. If ``line`` is a call to a user-defined gate, recursively inline its body with parameter / qubit-arg substitution. Otherwise return ``[line]`` unchanged. ``depth`` guards against pathological recursive gate definitions. """ import re as _re if depth > 32: raise NotImplementedError( "Custom gate inlining exceeded recursion depth (32). Likely a self-referential gate definition." ) gate_defs = getattr(self, "_gate_defs", {}) if not gate_defs: return [line] # Match: name(<params>) <args> OR name <args> m = _re.match( r"\s*([A-Za-z_][A-Za-z0-9_]*)" r"(?:\s*\(([^)]*)\))?" r"\s+(.+)$", line, ) if not m: return [line] name = m.group(1) if name not in gate_defs: return [line] param_expr_str = (m.group(2) or "").strip() arg_str = m.group(3).strip() params, qubit_args, body_lines = gate_defs[name] call_param_exprs = self._split_outside_parens(param_expr_str) if param_expr_str else [] call_args = self._split_outside_parens(arg_str) if len(call_param_exprs) != len(params): raise NotImplementedError(f"Gate '{name}' expects {len(params)} parameters, got {len(call_param_exprs)}") if len(call_args) != len(qubit_args): raise NotImplementedError(f"Gate '{name}' expects {len(qubit_args)} qubit arguments, got {len(call_args)}") param_bindings = dict(zip(params, call_param_exprs)) arg_bindings = dict(zip(qubit_args, call_args)) expanded: list[str] = [] for body_line in body_lines: new_line = body_line # Substitute the parameter expressions inside any (...) def _replace_paren(match): inner = match.group(1) expr_parts = self._split_outside_parens(inner) evaluated = [self._eval_param_expr(p, param_bindings) for p in expr_parts] return "(" + ", ".join(evaluated) + ")" new_line = _re.sub(r"\(([^()]*)\)", _replace_paren, new_line) # Substitute qubit argument identifiers (whole-word match). for src in sorted(arg_bindings, key=len, reverse=True): new_line = _re.sub(rf"\b{_re.escape(src)}\b", arg_bindings[src], new_line) # Recurse — body may itself reference other custom gates. expanded.extend(self._expand_custom_gate_call(new_line, depth=depth + 1)) return expanded @staticmethod def _compute_id(regs, reg_name, reg_id): id = 0 for stored_reg_name, stored_reg_size in regs: if stored_reg_name == reg_name: if reg_id >= stored_reg_size: raise RegisterOutOfRangeError() return id + reg_id id += stored_reg_size raise RegisterNotFoundError() def _get_qubit_id(self, qreg_name, qreg_id): try: qubit_id = OpenQASM2_BaseParser._compute_id(self.qregs, qreg_name, qreg_id) return qubit_id except RegisterNotFoundError as e: raise RegisterNotFoundError(f"Cannot find qreg {qreg_name}, (defined = {self.collected_qregs_str})") from e except RegisterOutOfRangeError as e: raise RegisterOutOfRangeError(f"qreg {qreg_name}[{qreg_id}] out of range.)") from e def _get_cbit_id(self, creg_name, creg_id): try: cbit_id = OpenQASM2_BaseParser._compute_id(self.cregs, creg_name, creg_id) return cbit_id except RegisterNotFoundError as e: raise RegisterNotFoundError(f"Cannot find creg {creg_name}, (defined = {self.collected_cregs_str})") from e except RegisterOutOfRangeError as e: raise RegisterOutOfRangeError(f"creg {creg_name}[{creg_id}] out of range.)") from e @staticmethod def _check_regs(collected_regs, reg_handler): # check whether qregs have the same name names = set() regs = list() total_size = 0 if len(collected_regs) == 0: raise RegisterDefinitionError("Register is empty") for reg_str in collected_regs: name, size = reg_handler(reg_str) if name in names: raise RegisterDefinitionError("Duplicate name") names.add(name) regs.append((name, size)) total_size += size return total_size, regs def _process_measurements(self): for measurement in self.collected_measurements_str: qreg_name, qreg_id, creg_name, creg_id = OpenQASM2_LineParser.handle_measure(measurement) qid = self._get_qubit_id(qreg_name, qreg_id) cid = self._get_cbit_id(creg_name, creg_id) self.measure_qubits.append((qid, cid))
[docs] def parse(self, raw_qasm): """Parse an OpenQASM 2.0 string and populate internal state. Args: raw_qasm: OpenQASM 2.0 string to parse. """ self.raw_qasm = raw_qasm # format, and check if QASM code is valid # also return the collected statements ( self.formatted_qasm, self.collected_qregs_str, self.collected_cregs_str, self.program_body_str, self.collected_measurements_str, ) = self._format_and_check() # process the total number of qubit try: self.n_qubit, self.qregs = OpenQASM2_BaseParser._check_regs( self.collected_qregs_str, OpenQASM2_LineParser.handle_qreg ) except RegisterDefinitionError as e: raise RegisterDefinitionError(f"QReg Definition Error.\nInternal error: \n{str(e)}") from e # process the total number of cbit try: self.n_cbit, self.cregs = OpenQASM2_BaseParser._check_regs( self.collected_cregs_str, OpenQASM2_LineParser.handle_creg ) except RegisterDefinitionError as e: raise RegisterDefinitionError(f"CReg Definition Error.\nInternal error: \n{str(e)}") from e # process measurements self._process_measurements() # process program body # Inline any user-defined-gate calls before per-line opcode lookup. expanded_body: list[str] = [] for line in self.program_body_str: expanded_body.extend(self._expand_custom_gate_call(line)) self.program_body_str = expanded_body for line in self.program_body_str: operation, qubits, cbits, parameters = OpenQASM2_LineParser.parse_line(line) if operation is None: continue # transform the qubit from regname+index to qubit_id # Note: register's validity is checked through _get_qubit_id if qubits: if isinstance(qubits, list): qubits = [self._get_qubit_id(qubit[0], qubit[1]) for qubit in qubits] else: qubits = self._get_qubit_id(qubits[0], qubits[1]) if cbits: if isinstance(cbits, list): cbits = [self._get_cbit_id(cbit[0], cbit[1]) for cbit in cbits] else: cbits = self._get_cbit_id(cbits[0], cbits[1]) # convert parameter to a scalar value if parameters and isinstance(parameters, list) and len(parameters) == 1: parameters = parameters[0] # transform into opcodes # opcodes = (operation,qubits,cbit,parameter,dagger_flag,control_qubits_set) opcode = get_opcode_from_QASM2(operation, qubits, cbits, parameters) # check if opcode is correctely converted if opcode is None: raise NotImplementedError( "Opcode is not converted correctly for " f"line: {line}.\n" f"operation: {operation}" f"qubits: {qubits}" f"cbits: {cbits}" f"parameters: {parameters}" ) self.program_body.append(opcode)
[docs] def to_originir(self): """Convert parsed OpenQASM data to OriginIR string. Returns: str: OriginIR string representation, including any ``MEASURE`` statements collected from the QASM source. """ oir_parser = OriginIR_BaseParser() oir_parser.n_qubit = self.n_qubit oir_parser.n_cbit = self.n_cbit oir_parser.program_body = self.program_body # Without this, OriginIR output silently drops all measurements. oir_parser.measure_qubits = list(self.measure_qubits) return oir_parser.to_extended_originir()
[docs] def to_circuit(self) -> Circuit: """ The function coverts OpenQASM string into uniqc.Circuit object. In the initilization phase, we need to notice that OriginIR-based quantum circuit doe not need to specify how many qregs and cregs used. Parameters: - qasm_str: The quantum circuit of intersts in the OpenQASM format. Returns: - origin_qcirc: The quantum circuit of intersts in the OriginIR format. """ # Create an empty Circuit object origincircuit = Circuit() # Split the QASM string into lines and parse each line for opcode in self.program_body: # unpack as 6 paramters (operation, qubits, params, cbits, dagger, control_qubits) origincircuit.add_gate(*opcode) # add measurements to the circuit, sort by cbit id measure_list = sorted(self.measure_qubits, key=lambda x: x[1]) for qubit, cbit in measure_list: origincircuit.measure(qubit) return origincircuit