API reference

chemsynthcalc

Python package for calculating the masses of substances required for chemical synthesis directly from the reaction string. It includes solutions for all intermediate steps, including chemical formula parsing, molar mass calculation and reaction balancing with different matrix methods.

Example use

Let's say that we need to prepare 3 grams of YBCO by solid-state synthesis from respective carbonates. The reaction string will look something like this (to simplify, let's leave it without oxygen nonstoichiometry):

from chemsynthcalc import ChemicalReaction

reaction_string = "BaCO3 + Y2(CO3)3 + CuCO3 + O2 → YBa2Cu3O7 + CO2"

Now, we can create a chemical reaction object of the ChemicalReaction class, which will be used in the calculation. We need to specify the arguments for our particular case:

>>> reaction = ChemicalReaction(
    reaction = reaction_string, # our reaction string
    target = 0, # index of target compound in the product list
    target_mass = 3, # desired mass of target compound,
    mode = "balance" # mode of coefficients calculations,
)

Now, to perform the automatic calculation, all we need to do is to put:

>>> reaction.print_results(print_rounding_order=4)
# assuming that we use analytical balances with 4 digit presicion

And we get our output in the terminal:

initial reaction: BaCO3+Y2(CO3)3+CuCO3+O2→YBa2Cu3O7+CO2
reaction matrix:
 [[1. 0. 0. 0. 2. 0.]
 [1. 3. 1. 0. 0. 1.]
 [3. 9. 3. 2. 7. 2.]
 [0. 2. 0. 0. 1. 0.]
 [0. 0. 1. 0. 3. 0.]]
mode: balance
formulas: ['BaCO3', 'Y2(CO3)3', 'CuCO3', 'O2', 'YBa2Cu3O7', 'CO2']
coefficients: [8, 2, 12, 1, 4, 26]
normalized coefficients: [2, 0.5, 3, 0.25, 1, 6.5]
algorithm: inverse
is balanced: True
final reaction: 8BaCO3+2Y2(CO3)3+12CuCO3+O2→4YBa2Cu3O7+26CO2
final reaction normalized: 2BaCO3+0.5Y2(CO3)3+3CuCO3+0.25O2→YBa2Cu3O7+6.5CO2
molar masses: [197.335, 357.835676, 123.554, 31.998, 666.190838, 44.009]
target: YBa2Cu3O7
masses: [1.7773, 0.8057, 1.6692, 0.036, 3.0, 1.2882]
BaCO3: M = 197.3350 g/mol, m = 1.7773 g
Y2(CO3)3: M = 357.8357 g/mol, m = 0.8057 g
CuCO3: M = 123.5540 g/mol, m = 1.6692 g
O2: M = 31.9980 g/mol, m = 0.0360 g
YBa2Cu3O7: M = 666.1908 g/mol, m = 3.0000 g
CO2: M = 44.0090 g/mol, m = 1.2882 g

`balancer`

`Balancer`

Bases: BalancingAlgorithms

A class for balancing chemical equations automatically by different matrix methods.

Parameters:

Name	Type	Description	Default
`matrix`	`NDArray[float64]`	Reaction matrix	required
`separator_pos`	`int`	Position of the reaction separator (usually the separator is "=")	required
`round_precision`	`int`	Coefficients rounding precision	required
`intify`	`bool`	Determines whether the coefficients should be integers	`True`

Attributes:

Name	Type	Description
`coef_limit`	`int`	max integer coefficient for _intify_coefficients method

Source code in src/chemsynthcalc/balancer.py

class Balancer(BalancingAlgorithms):
    """
    A class for balancing chemical equations automatically by different matrix methods.

    Parameters:
        matrix (npt.NDArray[np.float64]): Reaction matrix
        separator_pos (int): Position of the reaction separator (usually the separator is "=")
        round_precision (int): Coefficients rounding precision
        intify (bool): Determines whether the coefficients should be integers

    Attributes:
        coef_limit (int): max integer coefficient for \
        [_intify_coefficients][chemsynthcalc.balancer.Balancer._intify_coefficients] method
    """

    def __init__(
        self,
        matrix: npt.NDArray[np.float64],
        separator_pos: int,
        round_precision: int,
        intify: bool = True,
    ) -> None:
        super().__init__(matrix, separator_pos)

        if round_precision > 0:
            self.round_precision: int = round_precision
        else:
            raise ValueError("precision <= 0")

        self.intify: bool = intify
        self.coef_limit: int = 1_000_000

    def __str__(self) -> str:
        return f"Balancer object for matrix \n {self.reaction_matrix}"

    def __repr__(self) -> str:
        return f"Balancer({self.reaction_matrix}, {self.separator_pos}, {self.round_precision}, {self.intify})"

    def _intify_coefficients(
        self, coefficients: list[float], limit: int
    ) -> list[float | int] | list[int]:
        """
        Reduce the coefficients to integers by finding the greatest common divider.

        Parameters:
            coefficients (list): List of coefficients to intify
            limit (int): Upper limit (max int coef)

        Returns:
            A list of intified coefficients
        """
        initial_coefficients = coefficients
        frac = [Fraction(x).limit_denominator() for x in coefficients]
        vals = [
            int(
                fr.numerator
                * find_lcm([fr.denominator for fr in frac])
                / fr.denominator
            )
            for fr in frac
        ]
        coefficients = [int(val / find_gcd(vals)) for val in vals]
        if any(x > limit for x in coefficients):
            return initial_coefficients
        return coefficients

    @staticmethod
    def is_reaction_balanced(
        reactant_matrix: npt.NDArray[np.float64],
        product_matrix: npt.NDArray[np.float64],
        coefficients: list[float] | list[int],
        tolerance: float = 1e-8,
    ) -> bool:
        """
        Checks if reaction is balanced by multiplying reactant matrix and product matrix
        by the respective coefficient vector. Method is static to call it outside of balancer
        instance.

        Parameters:
            reactant_matrix (npt.NDArray[np.float64]): Matrix of reactants property generated by [ChemicalReaction][chemsynthcalc.chemical_reaction.ChemicalReaction] class
            product_matrix (npt.NDArray[np.float64]): Matrix of products property generated by [ChemicalReaction][chemsynthcalc.chemical_reaction.ChemicalReaction] class
            coefficients (list[float] | list[int]): Coefficients
            tolerance (float): tolerance limit for the *np.allclose* function

        Returns:
            True if balanced within tolerance

        Examples:
            >>> reaction = ChemicalReaction("NH4ClO4+HNO3+HCl=HClO4+NOCl+N2O+N2O3+H2O+Cl2")
            >>> Balancer.is_reaction_balanced(reaction.reactant_matrix, reaction.product_matrix, [64, 167, 137, 80, 43, 64, 30, 240, 39])
            True
            >>> reaction = ChemicalReaction("H2+O2=H2O")
            >>> Balancer.is_reaction_balanced(reaction.reactant_matrix, reaction.product_matrix, [2,2,2])
            False
        """
        try:
            reactants = np.multiply(
                reactant_matrix.T,
                np.array(coefficients)[: reactant_matrix.shape[1], None],
            )
            products = np.multiply(
                product_matrix.T,
                np.array(coefficients)[reactant_matrix.shape[1] :, None],
            )
            return np.allclose(
                reactants.sum(axis=0), products.sum(axis=0), rtol=tolerance
            )

        except Exception:
            return False

    def _calculate_by_method(self, method: str) -> list[float | int] | list[int]:
        """
        Compute the coefficients list by a specific method.

        Parameters:
            method (str): One of 4 currently implemented methods (inv, gpinv, ppinv, comb)

        Returns:
            A list of coefficients

        Raise:
            ValueError if method is not found. <br />
            [BalancingError][chemsynthcalc.chem_errors.BalancingError] if can't balance reaction by specified method.
        """
        match method:

            case "inv":
                coefficients: list[float] = np.round(
                    self._inv_algorithm(), decimals=self.round_precision
                ).tolist()  # type: ignore

            case "gpinv":
                coefficients: list[float] = np.round(
                    self._gpinv_algorithm(), decimals=self.round_precision + 2
                ).tolist()  # type: ignore

            case "ppinv":
                coefficients: list[float] = np.round(
                    self._ppinv_algorithm(), decimals=self.round_precision + 2
                ).tolist()  # type: ignore

            case "comb":
                res: npt.NDArray[np.int32] | None = self._comb_algorithm()
                if res is not None:
                    return res.tolist()  # type: ignore
                else:
                    raise BalancingError(f"Can't balance reaction by {method} method")

            case _:
                raise ValueError(f"No method {method}")

        if (
            Balancer.is_reaction_balanced(
                self.reactant_matrix, self.product_matrix, coefficients
            )
            and all(x > 0 for x in coefficients)
            and len(coefficients) == self.reaction_matrix.shape[1]
        ):
            if self.intify:
                intified = self._intify_coefficients(coefficients, self.coef_limit)
                if all(x < self.coef_limit for x in intified):
                    return intified
                else:
                    return coefficients
            else:
                return coefficients
        else:
            raise BalancingError(f"Can't balance reaction by {method} method")

    def inv(self) -> list[float | int] | list[int]:
        """
        A high-level function call to compute coefficients by Thorne method.

        Returns:
            A list of coefficients
        """
        return self._calculate_by_method("inv")

    def gpinv(self) -> list[float | int] | list[int]:
        """
        A high-level function call to compute coefficients by
        Risteski general pseudoinverse method.

        Returns:
            A list of coefficients
        """
        return self._calculate_by_method("gpinv")

    def ppinv(self) -> list[float | int] | list[int]:
        """
        A high-level function call to compute coefficients by
        Risteski partial pseudoinverse method.

        Returns:
            A list of coefficients
        """
        return self._calculate_by_method("ppinv")

    def comb(self) -> list[float | int] | list[int]:
        """
        A high-level function call to compute coefficients by
        combinatorial method.

        Returns:
            A list of coefficients
        """
        return self._calculate_by_method("comb")

    def auto(self) -> tuple[list[float | int] | list[int], str]:
        """
        A high-level function call to automatically compute coefficients
        by sequentially calling inv, gpinv, ppinv methods.

        Returns:
            A list of coefficients

        Raise:
            [BalancingError][chemsynthcalc.chem_errors.BalancingError] if can't balance reaction by any method.
        """
        try:
            return (self.inv(), "inverse")
        except Exception:
            pass
        try:
            return (self.gpinv(), "general pseudoinverse")
        except Exception:
            pass
        try:
            return (self.gpinv(), "partial pseudoinverse")
        except Exception:
            raise BalancingError("Can't balance this reaction by any method")

`_intify_coefficients(coefficients, limit)`

Reduce the coefficients to integers by finding the greatest common divider.

Parameters:

Name	Type	Description	Default
`coefficients`	`list`	List of coefficients to intify	required
`limit`	`int`	Upper limit (max int coef)	required

Returns:

Type	Description
`list[float \| int] \| list[int]`	A list of intified coefficients

Source code in src/chemsynthcalc/balancer.py

def _intify_coefficients(
    self, coefficients: list[float], limit: int
) -> list[float | int] | list[int]:
    """
    Reduce the coefficients to integers by finding the greatest common divider.

    Parameters:
        coefficients (list): List of coefficients to intify
        limit (int): Upper limit (max int coef)

    Returns:
        A list of intified coefficients
    """
    initial_coefficients = coefficients
    frac = [Fraction(x).limit_denominator() for x in coefficients]
    vals = [
        int(
            fr.numerator
            * find_lcm([fr.denominator for fr in frac])
            / fr.denominator
        )
        for fr in frac
    ]
    coefficients = [int(val / find_gcd(vals)) for val in vals]
    if any(x > limit for x in coefficients):
        return initial_coefficients
    return coefficients

`is_reaction_balanced(reactant_matrix, product_matrix, coefficients, tolerance=1e-08)` `staticmethod`

Checks if reaction is balanced by multiplying reactant matrix and product matrix by the respective coefficient vector. Method is static to call it outside of balancer instance.

Parameters:

Name	Type	Description	Default
`reactant_matrix`	`NDArray[float64]`	Matrix of reactants property generated by ChemicalReaction class	required
`product_matrix`	`NDArray[float64]`	Matrix of products property generated by ChemicalReaction class	required
`coefficients`	`list[float] \| list[int]`	Coefficients	required
`tolerance`	`float`	tolerance limit for the np.allclose function	`1e-08`

Returns:

Type	Description
`bool`	True if balanced within tolerance

Examples:

>>> reaction = ChemicalReaction("NH4ClO4+HNO3+HCl=HClO4+NOCl+N2O+N2O3+H2O+Cl2")
>>> Balancer.is_reaction_balanced(reaction.reactant_matrix, reaction.product_matrix, [64, 167, 137, 80, 43, 64, 30, 240, 39])
True
>>> reaction = ChemicalReaction("H2+O2=H2O")
>>> Balancer.is_reaction_balanced(reaction.reactant_matrix, reaction.product_matrix, [2,2,2])
False

Source code in src/chemsynthcalc/balancer.py

@staticmethod
def is_reaction_balanced(
    reactant_matrix: npt.NDArray[np.float64],
    product_matrix: npt.NDArray[np.float64],
    coefficients: list[float] | list[int],
    tolerance: float = 1e-8,
) -> bool:
    """
    Checks if reaction is balanced by multiplying reactant matrix and product matrix
    by the respective coefficient vector. Method is static to call it outside of balancer
    instance.

    Parameters:
        reactant_matrix (npt.NDArray[np.float64]): Matrix of reactants property generated by [ChemicalReaction][chemsynthcalc.chemical_reaction.ChemicalReaction] class
        product_matrix (npt.NDArray[np.float64]): Matrix of products property generated by [ChemicalReaction][chemsynthcalc.chemical_reaction.ChemicalReaction] class
        coefficients (list[float] | list[int]): Coefficients
        tolerance (float): tolerance limit for the *np.allclose* function

    Returns:
        True if balanced within tolerance

    Examples:
        >>> reaction = ChemicalReaction("NH4ClO4+HNO3+HCl=HClO4+NOCl+N2O+N2O3+H2O+Cl2")
        >>> Balancer.is_reaction_balanced(reaction.reactant_matrix, reaction.product_matrix, [64, 167, 137, 80, 43, 64, 30, 240, 39])
        True
        >>> reaction = ChemicalReaction("H2+O2=H2O")
        >>> Balancer.is_reaction_balanced(reaction.reactant_matrix, reaction.product_matrix, [2,2,2])
        False
    """
    try:
        reactants = np.multiply(
            reactant_matrix.T,
            np.array(coefficients)[: reactant_matrix.shape[1], None],
        )
        products = np.multiply(
            product_matrix.T,
            np.array(coefficients)[reactant_matrix.shape[1] :, None],
        )
        return np.allclose(
            reactants.sum(axis=0), products.sum(axis=0), rtol=tolerance
        )

    except Exception:
        return False

`_calculate_by_method(method)`

Compute the coefficients list by a specific method.

Parameters:

Name	Type	Description	Default
`method`	`str`	One of 4 currently implemented methods (inv, gpinv, ppinv, comb)	required

Returns:

Type	Description
`list[float \| int] \| list[int]`	A list of coefficients

Raise

ValueError if method is not found.
BalancingError if can't balance reaction by specified method.

Source code in src/chemsynthcalc/balancer.py

def _calculate_by_method(self, method: str) -> list[float | int] | list[int]:
    """
    Compute the coefficients list by a specific method.

    Parameters:
        method (str): One of 4 currently implemented methods (inv, gpinv, ppinv, comb)

    Returns:
        A list of coefficients

    Raise:
        ValueError if method is not found. <br />
        [BalancingError][chemsynthcalc.chem_errors.BalancingError] if can't balance reaction by specified method.
    """
    match method:

        case "inv":
            coefficients: list[float] = np.round(
                self._inv_algorithm(), decimals=self.round_precision
            ).tolist()  # type: ignore

        case "gpinv":
            coefficients: list[float] = np.round(
                self._gpinv_algorithm(), decimals=self.round_precision + 2
            ).tolist()  # type: ignore

        case "ppinv":
            coefficients: list[float] = np.round(
                self._ppinv_algorithm(), decimals=self.round_precision + 2
            ).tolist()  # type: ignore

        case "comb":
            res: npt.NDArray[np.int32] | None = self._comb_algorithm()
            if res is not None:
                return res.tolist()  # type: ignore
            else:
                raise BalancingError(f"Can't balance reaction by {method} method")

        case _:
            raise ValueError(f"No method {method}")

    if (
        Balancer.is_reaction_balanced(
            self.reactant_matrix, self.product_matrix, coefficients
        )
        and all(x > 0 for x in coefficients)
        and len(coefficients) == self.reaction_matrix.shape[1]
    ):
        if self.intify:
            intified = self._intify_coefficients(coefficients, self.coef_limit)
            if all(x < self.coef_limit for x in intified):
                return intified
            else:
                return coefficients
        else:
            return coefficients
    else:
        raise BalancingError(f"Can't balance reaction by {method} method")

`inv()`

A high-level function call to compute coefficients by Thorne method.

Returns:

Type	Description
`list[float \| int] \| list[int]`	A list of coefficients

Source code in src/chemsynthcalc/balancer.py

def inv(self) -> list[float | int] | list[int]:
    """
    A high-level function call to compute coefficients by Thorne method.

    Returns:
        A list of coefficients
    """
    return self._calculate_by_method("inv")

`gpinv()`

A high-level function call to compute coefficients by Risteski general pseudoinverse method.

Returns:

Type	Description
`list[float \| int] \| list[int]`	A list of coefficients

Source code in src/chemsynthcalc/balancer.py

def gpinv(self) -> list[float | int] | list[int]:
    """
    A high-level function call to compute coefficients by
    Risteski general pseudoinverse method.

    Returns:
        A list of coefficients
    """
    return self._calculate_by_method("gpinv")

`ppinv()`

A high-level function call to compute coefficients by Risteski partial pseudoinverse method.

Returns:

Type	Description
`list[float \| int] \| list[int]`	A list of coefficients

Source code in src/chemsynthcalc/balancer.py

def ppinv(self) -> list[float | int] | list[int]:
    """
    A high-level function call to compute coefficients by
    Risteski partial pseudoinverse method.

    Returns:
        A list of coefficients
    """
    return self._calculate_by_method("ppinv")

`comb()`

A high-level function call to compute coefficients by combinatorial method.

Returns:

Type	Description
`list[float \| int] \| list[int]`	A list of coefficients

Source code in src/chemsynthcalc/balancer.py

def comb(self) -> list[float | int] | list[int]:
    """
    A high-level function call to compute coefficients by
    combinatorial method.

    Returns:
        A list of coefficients
    """
    return self._calculate_by_method("comb")

`auto()`

A high-level function call to automatically compute coefficients by sequentially calling inv, gpinv, ppinv methods.

Returns:

Type	Description
`tuple[list[float \| int] \| list[int], str]`	A list of coefficients

Raise

BalancingError if can't balance reaction by any method.

Source code in src/chemsynthcalc/balancer.py

def auto(self) -> tuple[list[float | int] | list[int], str]:
    """
    A high-level function call to automatically compute coefficients
    by sequentially calling inv, gpinv, ppinv methods.

    Returns:
        A list of coefficients

    Raise:
        [BalancingError][chemsynthcalc.chem_errors.BalancingError] if can't balance reaction by any method.
    """
    try:
        return (self.inv(), "inverse")
    except Exception:
        pass
    try:
        return (self.gpinv(), "general pseudoinverse")
    except Exception:
        pass
    try:
        return (self.gpinv(), "partial pseudoinverse")
    except Exception:
        raise BalancingError("Can't balance this reaction by any method")

`balancing_algos`

`BalancingAlgorithms`

A collection of functions for balancing chemical reactions

Currently implemented: Thorne algorithm (see _inv_algorithm method for details), Risteski general pseudo-inverse algorithm (see _gpinv_algorithm method for details), Risteski partial pseudo-inverse algorithm (see _ppinv_algorithm method for details), and naive combinational search algorithm (see _comb_algorithm method for details).

Parameters:

Name	Type	Description	Default
`matrix`	`NDArray[float64]`	Reaction matrix	required
`separator_pos`	`int`	Position of the reaction separator (usually the separator is "=")	required

Attributes:

Name	Type	Description
`reactant_matrix`	`NDArray[float64]`	A matrix of the left part of the equation
`product_matrix`	`NDArray[float64]`	A matrix of the right part of the equation

Note

Why use scipy.linalg.pinv, when numpy.linalg.pinv is doing the same thing and does not require the whole SciPy import?

There are some peculiar reaction cases where (especially for _ppinv_algorithm method) the results for numpy.linalg.pinv differs from system to system (np version, OS, python version etc.). My understanding is that the cause of this behaviour lies in small differences for pinv algorithm in numpy C-libraries and BLAS-libraries, hence the difference. To avoid this, a more consistent method scipy.linalg.pinv was used.

Source code in src/chemsynthcalc/balancing_algos.py

class BalancingAlgorithms:
    """
    A collection of functions for balancing chemical reactions

    Currently implemented: Thorne algorithm (see
    [_inv_algorithm][chemsynthcalc.balancing_algos.BalancingAlgorithms._inv_algorithm] method for details),
    Risteski general pseudo-inverse algorithm (see
    [_gpinv_algorithm][chemsynthcalc.balancing_algos.BalancingAlgorithms._gpinv_algorithm] method for details),
    Risteski partial pseudo-inverse algorithm (see
    [_ppinv_algorithm][chemsynthcalc.balancing_algos.BalancingAlgorithms._ppinv_algorithm] method for details),
    and naive combinational search algorithm (see
    [_comb_algorithm][chemsynthcalc.balancing_algos.BalancingAlgorithms._comb_algorithm] method for details).

    Parameters:
        matrix (npt.NDArray[np.float64]): Reaction matrix
        separator_pos (int): Position of the reaction separator (usually the separator is "=")

    Attributes:
        reactant_matrix (npt.NDArray[np.float64]): A matrix of the left part of the equation
        product_matrix (npt.NDArray[np.float64]): A matrix of the right part of the equation

    Note:
        Why use
        [scipy.linalg.pinv](https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.pinv.html),
        when
        [numpy.linalg.pinv](https://numpy.org/doc/stable/reference/generated/numpy.linalg.pinv.html)
        is doing the same thing and does not require the whole SciPy import?

        There are some peculiar reaction cases where
        (especially for [_ppinv_algorithm][chemsynthcalc.balancing_algos.BalancingAlgorithms._ppinv_algorithm] method)
        the results for [numpy.linalg.pinv](https://numpy.org/doc/stable/reference/generated/numpy.linalg.pinv.html)
        differs from system to system (np version, OS, python version etc.). My understanding is that the cause of
        this behaviour lies in small differences for pinv algorithm in numpy C-libraries and BLAS-libraries,
        hence the difference.
        To avoid this, a more consistent method
        [scipy.linalg.pinv](https://docs.scipy.org/doc/scipy/reference/generated/scipy.linalg.pinv.html) was used.
    """

    def __init__(self, matrix: npt.NDArray[np.float64], separator_pos: int) -> None:
        self.separator_pos = separator_pos
        self.reaction_matrix: npt.NDArray[np.float64] = matrix
        self.reactant_matrix: npt.NDArray[np.float64] = self.reaction_matrix[
            :, : self.separator_pos
        ]
        self.product_matrix: npt.NDArray[np.float64] = self.reaction_matrix[
            :, self.separator_pos :
        ]

    def _inv_algorithm(self) -> npt.NDArray[np.float64]:
        """Matrix inverse algorithm for reaction balancing.

        A reaction matrix inverse algorithm proposed by [Thorne](https://arxiv.org/abs/1110.4321).
        The calculation is based on the nullity, or dimensionality, of the matrix.

        The algorithm can be described in steps:

        1) If the number of rows is greater than the number of columns, \
        add zero columns until the matrix becomes square \
        (Note: this is a modification of the original \
        Thorne method described in the article).

        2) If reaction matrix is square (which means that the number \
        of atoms involved is equal to the number of compounds) than \
        we turn matrix in its row-echelon form by singular value \
        decomposition.

        3) Calculation of the nullity of the matrix, which is \
        basically number of compounds minus rank of the matrix.

        4) Create a  matrix augumented by nullity number of rows \
        of flipped identity matrix. If any rows are zeros, \
        replace them with identity matrix rows.

        5) Inverse the augumented matrix.

        6) Exctract and transpose rightmost column.

        7) Normalize this value with the absolute min value of the vector.

        8) Round up float operations errors.

        The absolute values of this vector are coefficients of the
        reaction.

        Note:
            While this method works great for reactions with 0 and 1
            nullity, it generally cannot work with nullities 2 and higher.
            Thorne claims that for higher nullities, a nullity number
            of vectors should be extracted, and each of them contains
            a set of correct coefficients. However, if number of rows in
            the flipped augmentation identity matrix is 2 or more, one can
            easily see that each vector will contain nullity-1 zeroes,
            therefore they cannot be a correct vector of coefficients.

        Returns:
            A 1D NumPy array of calculated coefficients
        """
        reaction_matrix = self.reaction_matrix

        if reaction_matrix.shape[0] > reaction_matrix.shape[1]:
            zeros_added = reaction_matrix.shape[0] - reaction_matrix.shape[1]
            zero_columns = np.zeros(
                (
                    reaction_matrix.shape[0],
                    zeros_added,
                )
            )
            reaction_matrix = np.hstack((reaction_matrix, zero_columns))
        else:
            zeros_added = 0

        if reaction_matrix.shape[0] == reaction_matrix.shape[1]:
            _, _, reaction_matrix = np.linalg.svd(reaction_matrix)

        number_of_cols = reaction_matrix.shape[1]
        rank = np.linalg.matrix_rank(reaction_matrix, tol=1e-100)
        nullity = number_of_cols - rank
        augument = np.flip(np.identity(reaction_matrix.shape[1])[:nullity], axis=1)
        augumented_matrix = np.vstack((reaction_matrix, augument))
        if np.where(~augumented_matrix.any(axis=1))[0].size > 0:
            augumented_matrix = augumented_matrix[
                ~np.all(augumented_matrix == 0, axis=1)
            ]
        inversed_matrix = np.linalg.inv(augumented_matrix)
        vector = inversed_matrix[:, -zeros_added - 1].T
        vector = np.absolute(np.squeeze(np.asarray(vector)))
        vector = vector[vector != 0]
        coefs = np.divide(vector, vector.min())
        return coefs

    def _gpinv_algorithm(self) -> npt.NDArray[np.float64]:
        """Matrix gerenal pseudoinverse algorithm for reaction balancing.

        A reaction matrix pseudoinverse algorithm
        proposed by [Risteski](http://koreascience.or.kr/article/JAKO201314358624990.page).
        There are other articles and methods of chemical
        equation balancing by this author, however, this particular
        algorithm seems to be most convenient for matrix calculations.
        The algorithm can be described in steps:

        1) Stack reactant matrix and negative product matrix.

        2) Calculate MP pseudoinverse of this matrix.

        3) Calculate coefficients by formula:
        x = (I – A+A)a, where x is the coefficients vector,
        I - identity matrix, A+ - MP inverse, A - matrix,
        a - arbitrary vector (in this case, vector of ones).

        Note:
            This method is more general than Thorne's method, although it has some
            peculiarities of its own. First of all, the output of this method is float array,
            so, to generate an int coefs list, it needs to be converted, which is
            not always leads to a good result. Secondly, MP pseudoinverse
            is sensetive to row order in the reaction matrix. The rows should
            be ordered by atoms apperances in the reaction string.

        Returns:
            A 1D NumPy array of calculated coefficients
        """
        matrix = np.hstack((self.reactant_matrix, -self.product_matrix))
        inverse = scipy.linalg.pinv(matrix)
        a = np.ones((matrix.shape[1], 1))
        i = np.identity(matrix.shape[1])
        coefs = (i - inverse @ matrix) @ a
        return coefs.flat[:]

    def _ppinv_algorithm(self) -> npt.NDArray[np.float64]:
        """
        Matrix partial pseudoinverse algorithm for reaction balancing.

        A reaction matrix pseudoinverse algorithm also
        proposed by [Risteski](https://www.koreascience.or.kr/article/JAKO200802727293429.page).
        The method is founded on virtue of the solution of a
        Diophantine matrix equation by using of a Moore-Penrose
        pseudoinverse matrix.

        The algorithm can be described in steps:

        1) Take the Moore-Penrose pseudoinverse of the reactant matrix.

        2) Create a G matrix in the form of (I-AA^-)B, where
        I is the identity matrix, A is the reactant matrix, A^- is
        the MP pseudoinverse of A and B is the product matrix.

        3) Then, the vector y (coefficients of products) is equal to
        (I-G^-G)u.

        4) Vector x (coefficients of reactants) is equal to
        A^-By + (I-A^-A)v, where u and v are columns of ones.

        Note:
            While this algorithm and
            [_gpinv_algorithm][chemsynthcalc.balancing_algos.BalancingAlgorithms._gpinv_algorithm]
            are very similar, there are some differences in output results.
            This method exists mostly for legacy purposes, like balancing
            some reactions according to [Risteski](https://www.koreascience.or.kr/article/JAKO200802727293429.page).

        Returns:
            A 1D NumPy array of calculated coefficients
        """
        MP_inverse = scipy.linalg.pinv(self.reactant_matrix)
        g_matrix = (
            np.identity(self.reaction_matrix.shape[0])
            - self.reactant_matrix @ MP_inverse
        )
        g_matrix = g_matrix @ self.product_matrix
        y_multiply = scipy.linalg.pinv(g_matrix) @ g_matrix
        y_vector = (np.identity(y_multiply.shape[1]) - y_multiply).dot(
            np.ones(y_multiply.shape[1])
        )
        x_multiply = MP_inverse @ self.reactant_matrix
        x_multiply = (
            np.identity(x_multiply.shape[1]) - x_multiply
        ) + MP_inverse @ self.product_matrix @ y_vector.T
        x_vector = x_multiply[0].T
        coefs = np.squeeze(np.asarray(np.hstack((x_vector, y_vector))))
        return coefs

    def _comb_algorithm(
        self, max_number_of_iterations: float = 1e8
    ) -> npt.NDArray[np.int32] | None:
        """
        Matrix combinatorial algorithm for reaction balancing.

        Finds a solution solution of a Diophantine matrix equation
        by simply enumerating of all possible solutions of number_of_iterations
        coefficients. The solution space is created by Cartesian product
        (in this case, *np.meshgrid* function), and therefore it is very
        limited by memory. There must a better, clever and fast solution
        to this!

        Important:
            Only for integer coefficients less than 128. Only for reactions
            with total compound count <=10.
            A GPU-accelerated version of this method can be done by importing
            CuPy and replacing np. with cp.

        Note:
            All possible variations of coefficients vectors are
            combinations = max_coefficients**number_of_compounds,
            therefore this method is most effective for reaction with
            small numbers of compounds.

        Returns:
            A 1D NumPy array of calculated coefficients of None if can't compute
        """
        byte = 127
        number_of_compounds = self.reaction_matrix.shape[1]
        if number_of_compounds > 10:
            raise ValueError("Sorry, this method is only for n of compound <=10")

        number_of_iterations = int(
            max_number_of_iterations ** (1 / number_of_compounds)
        )

        if number_of_iterations > byte:
            number_of_iterations = byte

        trans_reaction_matrix = (self.reaction_matrix).T
        lenght = self.reactant_matrix.shape[1]
        old_reactants = trans_reaction_matrix[:lenght].astype("ushort")
        old_products = trans_reaction_matrix[lenght:].astype("ushort")
        for i in range(2, number_of_iterations + 2):
            cart_array = (np.arange(1, i, dtype="ubyte"),) * number_of_compounds
            permuted = np.array(np.meshgrid(*cart_array), dtype="ubyte").T.reshape(
                -1, number_of_compounds
            )
            filter = np.asarray([i - 1], dtype="ubyte")
            permuted = permuted[np.any(permuted == filter, axis=1)]
            # print("calculating max coef %s of %s" % (i-1, number_of_iterations), end='\r', flush=False)
            reactants_vectors = permuted[:, :lenght]
            products_vectors = permuted[:, lenght:]
            del permuted
            reactants = (old_reactants[None, :, :] * reactants_vectors[:, :, None]).sum(
                axis=1
            )
            products = (old_products[None, :, :] * products_vectors[:, :, None]).sum(
                axis=1
            )
            diff = np.subtract(reactants, products)
            del reactants
            del products
            where = np.where(~diff.any(axis=1))[0]
            if np.any(where):
                if where.shape[0] == 1:
                    idx = where
                else:
                    idx = where[0]
                # print("")
                return np.array(
                    np.concatenate(
                        (
                            reactants_vectors[idx].flatten(),
                            products_vectors[idx].flatten(),
                        )
                    )
                )
            gc.collect()
        # print("")
        return None

`_inv_algorithm()`

Matrix inverse algorithm for reaction balancing.

A reaction matrix inverse algorithm proposed by Thorne. The calculation is based on the nullity, or dimensionality, of the matrix.

The algorithm can be described in steps:

1) If the number of rows is greater than the number of columns, add zero columns until the matrix becomes square (Note: this is a modification of the original Thorne method described in the article).

2) If reaction matrix is square (which means that the number of atoms involved is equal to the number of compounds) than we turn matrix in its row-echelon form by singular value decomposition.

3) Calculation of the nullity of the matrix, which is basically number of compounds minus rank of the matrix.

4) Create a matrix augumented by nullity number of rows of flipped identity matrix. If any rows are zeros, replace them with identity matrix rows.

5) Inverse the augumented matrix.

6) Exctract and transpose rightmost column.

7) Normalize this value with the absolute min value of the vector.

8) Round up float operations errors.

The absolute values of this vector are coefficients of the reaction.

Note

While this method works great for reactions with 0 and 1 nullity, it generally cannot work with nullities 2 and higher. Thorne claims that for higher nullities, a nullity number of vectors should be extracted, and each of them contains a set of correct coefficients. However, if number of rows in the flipped augmentation identity matrix is 2 or more, one can easily see that each vector will contain nullity-1 zeroes, therefore they cannot be a correct vector of coefficients.

Returns:

Type	Description
`NDArray[float64]`	A 1D NumPy array of calculated coefficients

Source code in src/chemsynthcalc/balancing_algos.py

def _inv_algorithm(self) -> npt.NDArray[np.float64]:
    """Matrix inverse algorithm for reaction balancing.

    A reaction matrix inverse algorithm proposed by [Thorne](https://arxiv.org/abs/1110.4321).
    The calculation is based on the nullity, or dimensionality, of the matrix.

    The algorithm can be described in steps:

    1) If the number of rows is greater than the number of columns, \
    add zero columns until the matrix becomes square \
    (Note: this is a modification of the original \
    Thorne method described in the article).

    2) If reaction matrix is square (which means that the number \
    of atoms involved is equal to the number of compounds) than \
    we turn matrix in its row-echelon form by singular value \
    decomposition.

    3) Calculation of the nullity of the matrix, which is \
    basically number of compounds minus rank of the matrix.

    4) Create a  matrix augumented by nullity number of rows \
    of flipped identity matrix. If any rows are zeros, \
    replace them with identity matrix rows.

    5) Inverse the augumented matrix.

    6) Exctract and transpose rightmost column.

    7) Normalize this value with the absolute min value of the vector.

    8) Round up float operations errors.

    The absolute values of this vector are coefficients of the
    reaction.

    Note:
        While this method works great for reactions with 0 and 1
        nullity, it generally cannot work with nullities 2 and higher.
        Thorne claims that for higher nullities, a nullity number
        of vectors should be extracted, and each of them contains
        a set of correct coefficients. However, if number of rows in
        the flipped augmentation identity matrix is 2 or more, one can
        easily see that each vector will contain nullity-1 zeroes,
        therefore they cannot be a correct vector of coefficients.

    Returns:
        A 1D NumPy array of calculated coefficients
    """
    reaction_matrix = self.reaction_matrix

    if reaction_matrix.shape[0] > reaction_matrix.shape[1]:
        zeros_added = reaction_matrix.shape[0] - reaction_matrix.shape[1]
        zero_columns = np.zeros(
            (
                reaction_matrix.shape[0],
                zeros_added,
            )
        )
        reaction_matrix = np.hstack((reaction_matrix, zero_columns))
    else:
        zeros_added = 0

    if reaction_matrix.shape[0] == reaction_matrix.shape[1]:
        _, _, reaction_matrix = np.linalg.svd(reaction_matrix)

    number_of_cols = reaction_matrix.shape[1]
    rank = np.linalg.matrix_rank(reaction_matrix, tol=1e-100)
    nullity = number_of_cols - rank
    augument = np.flip(np.identity(reaction_matrix.shape[1])[:nullity], axis=1)
    augumented_matrix = np.vstack((reaction_matrix, augument))
    if np.where(~augumented_matrix.any(axis=1))[0].size > 0:
        augumented_matrix = augumented_matrix[
            ~np.all(augumented_matrix == 0, axis=1)
        ]
    inversed_matrix = np.linalg.inv(augumented_matrix)
    vector = inversed_matrix[:, -zeros_added - 1].T
    vector = np.absolute(np.squeeze(np.asarray(vector)))
    vector = vector[vector != 0]
    coefs = np.divide(vector, vector.min())
    return coefs

`_gpinv_algorithm()`

Matrix gerenal pseudoinverse algorithm for reaction balancing.

A reaction matrix pseudoinverse algorithm proposed by Risteski. There are other articles and methods of chemical equation balancing by this author, however, this particular algorithm seems to be most convenient for matrix calculations. The algorithm can be described in steps:

1) Stack reactant matrix and negative product matrix.

2) Calculate MP pseudoinverse of this matrix.

3) Calculate coefficients by formula: x = (I – A+A)a, where x is the coefficients vector, I - identity matrix, A+ - MP inverse, A - matrix, a - arbitrary vector (in this case, vector of ones).

Note

This method is more general than Thorne's method, although it has some peculiarities of its own. First of all, the output of this method is float array, so, to generate an int coefs list, it needs to be converted, which is not always leads to a good result. Secondly, MP pseudoinverse is sensetive to row order in the reaction matrix. The rows should be ordered by atoms apperances in the reaction string.

Returns:

Type	Description
`NDArray[float64]`	A 1D NumPy array of calculated coefficients

Source code in src/chemsynthcalc/balancing_algos.py

def _gpinv_algorithm(self) -> npt.NDArray[np.float64]:
    """Matrix gerenal pseudoinverse algorithm for reaction balancing.

    A reaction matrix pseudoinverse algorithm
    proposed by [Risteski](http://koreascience.or.kr/article/JAKO201314358624990.page).
    There are other articles and methods of chemical
    equation balancing by this author, however, this particular
    algorithm seems to be most convenient for matrix calculations.
    The algorithm can be described in steps:

    1) Stack reactant matrix and negative product matrix.

    2) Calculate MP pseudoinverse of this matrix.

    3) Calculate coefficients by formula:
    x = (I – A+A)a, where x is the coefficients vector,
    I - identity matrix, A+ - MP inverse, A - matrix,
    a - arbitrary vector (in this case, vector of ones).

    Note:
        This method is more general than Thorne's method, although it has some
        peculiarities of its own. First of all, the output of this method is float array,
        so, to generate an int coefs list, it needs to be converted, which is
        not always leads to a good result. Secondly, MP pseudoinverse
        is sensetive to row order in the reaction matrix. The rows should
        be ordered by atoms apperances in the reaction string.

    Returns:
        A 1D NumPy array of calculated coefficients
    """
    matrix = np.hstack((self.reactant_matrix, -self.product_matrix))
    inverse = scipy.linalg.pinv(matrix)
    a = np.ones((matrix.shape[1], 1))
    i = np.identity(matrix.shape[1])
    coefs = (i - inverse @ matrix) @ a
    return coefs.flat[:]

`_ppinv_algorithm()`

Matrix partial pseudoinverse algorithm for reaction balancing.

A reaction matrix pseudoinverse algorithm also proposed by Risteski. The method is founded on virtue of the solution of a Diophantine matrix equation by using of a Moore-Penrose pseudoinverse matrix.

The algorithm can be described in steps:

1) Take the Moore-Penrose pseudoinverse of the reactant matrix.

2) Create a G matrix in the form of (I-AA^-)B, where I is the identity matrix, A is the reactant matrix, A^- is the MP pseudoinverse of A and B is the product matrix.

3) Then, the vector y (coefficients of products) is equal to (I-G^-G)u.

4) Vector x (coefficients of reactants) is equal to A^-By + (I-A^-A)v, where u and v are columns of ones.

Note

While this algorithm and _gpinv_algorithm are very similar, there are some differences in output results. This method exists mostly for legacy purposes, like balancing some reactions according to Risteski.

Returns:

Type	Description
`NDArray[float64]`	A 1D NumPy array of calculated coefficients

Source code in src/chemsynthcalc/balancing_algos.py

def _ppinv_algorithm(self) -> npt.NDArray[np.float64]:
    """
    Matrix partial pseudoinverse algorithm for reaction balancing.

    A reaction matrix pseudoinverse algorithm also
    proposed by [Risteski](https://www.koreascience.or.kr/article/JAKO200802727293429.page).
    The method is founded on virtue of the solution of a
    Diophantine matrix equation by using of a Moore-Penrose
    pseudoinverse matrix.

    The algorithm can be described in steps:

    1) Take the Moore-Penrose pseudoinverse of the reactant matrix.

    2) Create a G matrix in the form of (I-AA^-)B, where
    I is the identity matrix, A is the reactant matrix, A^- is
    the MP pseudoinverse of A and B is the product matrix.

    3) Then, the vector y (coefficients of products) is equal to
    (I-G^-G)u.

    4) Vector x (coefficients of reactants) is equal to
    A^-By + (I-A^-A)v, where u and v are columns of ones.

    Note:
        While this algorithm and
        [_gpinv_algorithm][chemsynthcalc.balancing_algos.BalancingAlgorithms._gpinv_algorithm]
        are very similar, there are some differences in output results.
        This method exists mostly for legacy purposes, like balancing
        some reactions according to [Risteski](https://www.koreascience.or.kr/article/JAKO200802727293429.page).

    Returns:
        A 1D NumPy array of calculated coefficients
    """
    MP_inverse = scipy.linalg.pinv(self.reactant_matrix)
    g_matrix = (
        np.identity(self.reaction_matrix.shape[0])
        - self.reactant_matrix @ MP_inverse
    )
    g_matrix = g_matrix @ self.product_matrix
    y_multiply = scipy.linalg.pinv(g_matrix) @ g_matrix
    y_vector = (np.identity(y_multiply.shape[1]) - y_multiply).dot(
        np.ones(y_multiply.shape[1])
    )
    x_multiply = MP_inverse @ self.reactant_matrix
    x_multiply = (
        np.identity(x_multiply.shape[1]) - x_multiply
    ) + MP_inverse @ self.product_matrix @ y_vector.T
    x_vector = x_multiply[0].T
    coefs = np.squeeze(np.asarray(np.hstack((x_vector, y_vector))))
    return coefs

`_comb_algorithm(max_number_of_iterations=100000000.0)`

Matrix combinatorial algorithm for reaction balancing.

Finds a solution solution of a Diophantine matrix equation by simply enumerating of all possible solutions of number_of_iterations coefficients. The solution space is created by Cartesian product (in this case, np.meshgrid function), and therefore it is very limited by memory. There must a better, clever and fast solution to this!

Important

Only for integer coefficients less than 128. Only for reactions with total compound count <=10. A GPU-accelerated version of this method can be done by importing CuPy and replacing np. with cp.

Note

All possible variations of coefficients vectors are combinations = max_coefficients**number_of_compounds, therefore this method is most effective for reaction with small numbers of compounds.

Returns:

Type	Description
`NDArray[int32] \| None`	A 1D NumPy array of calculated coefficients of None if can't compute

Source code in src/chemsynthcalc/balancing_algos.py

def _comb_algorithm(
    self, max_number_of_iterations: float = 1e8
) -> npt.NDArray[np.int32] | None:
    """
    Matrix combinatorial algorithm for reaction balancing.

    Finds a solution solution of a Diophantine matrix equation
    by simply enumerating of all possible solutions of number_of_iterations
    coefficients. The solution space is created by Cartesian product
    (in this case, *np.meshgrid* function), and therefore it is very
    limited by memory. There must a better, clever and fast solution
    to this!

    Important:
        Only for integer coefficients less than 128. Only for reactions
        with total compound count <=10.
        A GPU-accelerated version of this method can be done by importing
        CuPy and replacing np. with cp.

    Note:
        All possible variations of coefficients vectors are
        combinations = max_coefficients**number_of_compounds,
        therefore this method is most effective for reaction with
        small numbers of compounds.

    Returns:
        A 1D NumPy array of calculated coefficients of None if can't compute
    """
    byte = 127
    number_of_compounds = self.reaction_matrix.shape[1]
    if number_of_compounds > 10:
        raise ValueError("Sorry, this method is only for n of compound <=10")

    number_of_iterations = int(
        max_number_of_iterations ** (1 / number_of_compounds)
    )

    if number_of_iterations > byte:
        number_of_iterations = byte

    trans_reaction_matrix = (self.reaction_matrix).T
    lenght = self.reactant_matrix.shape[1]
    old_reactants = trans_reaction_matrix[:lenght].astype("ushort")
    old_products = trans_reaction_matrix[lenght:].astype("ushort")
    for i in range(2, number_of_iterations + 2):
        cart_array = (np.arange(1, i, dtype="ubyte"),) * number_of_compounds
        permuted = np.array(np.meshgrid(*cart_array), dtype="ubyte").T.reshape(
            -1, number_of_compounds
        )
        filter = np.asarray([i - 1], dtype="ubyte")
        permuted = permuted[np.any(permuted == filter, axis=1)]
        # print("calculating max coef %s of %s" % (i-1, number_of_iterations), end='\r', flush=False)
        reactants_vectors = permuted[:, :lenght]
        products_vectors = permuted[:, lenght:]
        del permuted
        reactants = (old_reactants[None, :, :] * reactants_vectors[:, :, None]).sum(
            axis=1
        )
        products = (old_products[None, :, :] * products_vectors[:, :, None]).sum(
            axis=1
        )
        diff = np.subtract(reactants, products)
        del reactants
        del products
        where = np.where(~diff.any(axis=1))[0]
        if np.any(where):
            if where.shape[0] == 1:
                idx = where
            else:
                idx = where[0]
            # print("")
            return np.array(
                np.concatenate(
                    (
                        reactants_vectors[idx].flatten(),
                        products_vectors[idx].flatten(),
                    )
                )
            )
        gc.collect()
    # print("")
    return None

`chem_errors`

Module that contains custom errors for use in ChemSynthCalc