import os import re from pathlib import Path ''' A single-stage pipeline to convert MATLAB codebase to Python source_dir: folder containing MATLAB scripts target_dir: folder where Python scripts to be saved - created if not exists ''' source_dir = "MATLAB_CODES" target_dir = "PYTHON_CODES" # MATLAB to Python mappings. NOT / Exponential operators are handled separately FUNCTION_MAP = { "disp" : "print", "length": "len", "zeros" : "np.zeros", "ones" : "np.ones", "size" : "np.shape", "sqrt" : "np.sqrt", "sin" : "np.sin", "cos" : "np.cos", "mean" : "np.mean", "sum" : "np.sum", "max" : "np.max", "min" : "np.min" } class MatlabToPythonConverter: ''' Class to convert MATLAB scripts to Python ''' def __init__(self): self.report = [] def convert_repository(self, source_dir, target_dir): ''' Function to change file extension from *.m to *.py where the folder structure as in source_dir is maintained in the target_dir. This function calls convert_file function and the generate_report function. ''' Path(target_dir).mkdir(parents=True, exist_ok=True) for root, _, files in os.walk(source_dir): for file in files: if file.endswith(".m"): src_file = os.path.join(root, file) relative = os.path.relpath(src_file, source_dir) target_file = os.path.join(target_dir,relative.replace(".m", ".py")) Path(os.path.dirname(target_file)).mkdir( parents=True, exist_ok=True ) self.convert_file(src_file, target_file) self.generate_report(target_dir) def convert_file(self, matlab_file, python_file): ''' This function is called recursively for each MATLAB script and the function convert_code is called here to update MATLAB code with equivalent Python statements. ''' with open(matlab_file, "r", encoding="utf-8") as f: code = f.read() converted = self.convert_code(code) with open(python_file, "w", encoding="utf-8") as f: f.write(converted) self.report.append( f"{matlab_file} -> {python_file}" ) def convert_code(self, code): ''' Replace comment, loops and "built-in" function definitions as defined in FUNCTION_MAP. ''' lines = code.splitlines() converted_lines = ["import numpy as np", ""] for line in lines: line = self.convert_comments(line) line = self.convert_line_continuation(line) line = self.convert_built_in_functions(line) line = self.convert_boolean_and(line) line = self.convert_boolean_or(line) line = self.convert_if(line) line = self.convert_for(line) line = self.convert_while(line) line = self.convert_end(line) line = self.convert_assignment(line) line = self.convert_function_definition(line) converted_lines.append(line) return "\n".join(converted_lines) def convert_comments(self, line): ''' Remove comments and semi-colon at the end of statements ''' line_no_semicolon = line.rstrip(';') return re.sub(r'^\s*%', '#', line_no_semicolon) def convert_built_in_functions(self, line): for matlab_func, python_func in FUNCTION_MAP.items(): pattern = rf'\b{matlab_func}\b' line = re.sub(pattern, python_func, line) return line def convert_if(self, line): match = re.match(r'\s*if\s+(.*)', line) if match: return f"if {match.group(1)}:" return line def convert_boolean_and(self, line): ''' Replace() method is strictly case-sensitive. The & operator evaluates arrays element-by-element, whereas the && operator evaluates only scalar conditions and stops evaluating (short-circuits) as soon as the result is known. In other words, && stops if the first operand is false. ''' if "&&" in line: return line.replace("&&", "and") return line def convert_boolean_or(self, line): ''' Strings in Python are immutable, each .replace() call returns a new string copy. ''' logical_equivalents = {"||" : "or", "|" : "or"} for or_mat, or_py in logical_equivalents.items(): line = line.replace(or_mat, or_py) return line def convert_line_continuation(self, line): ''' Replaces ... with \ ''' logical_equivalents = {"..." : "\"} for line_mat, line_py in logical_equivalents.items(): line = line.replace(line_mat, line_py) return line def convert_for(self, line): match = re.match( r'\s*for\s+(\w+)\s*=\s*(\d+):(\d+)', line ) if match: var = match.group(1) start = match.group(2) end = int(match.group(3)) + 1 return f"for {var} in range({start}, {end}):" return line def convert_while(self, line): match = re.match(r'\s*while\s+(.*)', line) if match: return f"while {match.group(1)}:" return line def convert_end(self, line): ''' Tracks return variables, add return statement before matching end. It should not return statements accessing last element of an array. ''' if line.strip() == "end": if hasattr(self, "current_return_vars"): if len(self.current_return_vars) == 1: return ( f" return " f"{self.current_return_vars[0]}" ) elif len(self.current_return_vars) > 1: vars = ", ".join(self.current_return_vars) return f" return {vars}" return "" return line def convert_assignment(self, line): ''' Replace logical NOT and exponential operators ''' line = line.replace("~=", "!=") line = line.replace("^", "**") return line def convert_function_definition(self, line): ''' This function changes function name: e.g. function y = sin(x) - here y is the returned value. return y is added in place of 'end' statement. ''' # When MATLAB function returns a single value single_return = re.match( r'^\s*function\s+(\w+)\s*=\s*(\w+)\((.*?)\)', line ) if single_return: return_var = single_return.group(1) func_name = single_return.group(2) params = single_return.group(3) self.current_return_vars = [return_var] return f"def {func_name}({params}):" # When MATLAB function returns two or more values multi_return = re.match( r'^\s*function\s+\[(.*?)\]\s*=\s*(\w+)\((.*?)\)', line ) if multi_return: returns = [ r.strip() for r in multi_return.group(1).split(",") ] func_name = multi_return.group(2) params = multi_return.group(3) self.current_return_vars = returns return f"def {func_name}({params}):" # When MATLAB function returns NO value no_return = re.match( r'^\s*function\s+(\w+)\((.*?)\)', line ) if no_return: func_name = no_return.group(1) params = no_return.group(2) self.current_return_vars = [] return f"def {func_name}({params}):" return line def generate_report(self, target_dir): ''' Write summary of MATLAB files updated ''' report_file = os.path.join(target_dir, "conversion_report.txt") with open(report_file, "w") as f: f.write("MATLAB TO PYTHON CONVERSION REPORT\n") f.write("=" * 50 + "\n\n") for item in self.report: f.write(item + "\n") if __name__ == "__main__": converter = MatlabToPythonConverter() converter.convert_repository(source_dir, target_dir)