#!/usr/bin/env python3
"""
Regression Test Generator for Genesis System.

This script automatically generates regression tests from successful task completions
logged in a designated directory. It parses the logs, extracts relevant information
(task name, input parameters, expected output), and creates corresponding test cases
in a specified output file.

The generated tests can then be integrated into the Genesis System's testing framework
to ensure that changes to the system do not break existing functionality.
"""

import argparse
import json
import logging
import os
import re
from typing import Any, Dict, List, Tuple

# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)


class RegressionTestGenerator:
    """
    A class to generate regression tests from task completion logs.
    """

    def __init__(self, log_dir: str, output_file: str, task_pattern: str = r"task_(.*?)_"):
        """
        Initializes the RegressionTestGenerator.

        Args:
            log_dir: The directory containing task completion logs.
            output_file: The file to write the generated tests to.
            task_pattern: Regular expression pattern to extract the task name
                           from the log file name.  Defaults to `task_(.*?)_`
        """
        self.log_dir = log_dir
        self.output_file = output_file
        self.task_pattern = task_pattern
        self.tests: List[Dict[str, Any]] = []

    def parse_logs(self) -> None:
        """
        Parses the task completion logs and extracts relevant information.
        """
        for filename in os.listdir(self.log_dir):
            if filename.endswith(".log"):
                filepath = os.path.join(self.log_dir, filename)
                try:
                    with open(filepath, "r") as f:
                        log_content = f.read()

                    task_name = self._extract_task_name(filename)
                    if not task_name:
                        logging.warning(f"Could not extract task name from {filename}. Skipping.")
                        continue

                    input_data, output_data = self._extract_input_output(log_content)

                    if input_data and output_data:
                        self.tests.append(
                            {
                                "task_name": task_name,
                                "input": input_data,
                                "expected_output": output_data,
                            }
                        )
                    else:
                        logging.warning(f"Could not extract input/output from {filename}. Skipping.")

                except Exception as e:
                    logging.error(f"Error processing {filename}: {e}")

    def _extract_task_name(self, filename: str) -> str:
        """
        Extracts the task name from the log filename using the configured pattern.

        Args:
            filename: The name of the log file.

        Returns:
            The extracted task name, or None if not found.
        """
        match = re.search(self.task_pattern, filename)
        if match:
            return match.group(1)
        return None

    def _extract_input_output(self, log_content: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        """
        Extracts input and output data from the log content.

        This implementation assumes that the input and output are clearly marked
        within the logs using JSON formatting.  The exact format of the log
        should be adjusted as needed to match Genesis conventions.

        Args:
            log_content: The content of the log file.

        Returns:
            A tuple containing the input data and the expected output data.  Returns
            (None, None) if extraction fails.
        """
        input_data = None
        output_data = None
        try:
            input_match = re.search(r"INPUT_START(.*?)INPUT_END", log_content, re.DOTALL)
            output_match = re.search(r"OUTPUT_START(.*?)OUTPUT_END", log_content, re.DOTALL)

            if input_match:
                input_json = input_match.group(1).strip()
                try:
                    input_data = json.loads(input_json)
                except json.JSONDecodeError as e:
                    logging.warning(f"Failed to decode input JSON: {e}")
                    input_data = None

            if output_match:
                output_json = output_match.group(1).strip()
                try:
                    output_data = json.loads(output_json)
                except json.JSONDecodeError as e:
                    logging.warning(f"Failed to decode output JSON: {e}")
                    output_data = None
        except Exception as e:
            logging.error(f"Error extracting input/output: {e}")

        return input_data, output_data

    def generate_test_file(self) -> None:
        """
        Generates the test file with the extracted test cases.

        This function writes the tests to the specified output file in a Python format.
        You may need to adapt this part to fit the exact test framework used in Genesis.
        """
        try:
            with open(self.output_file, "w") as f:
                f.write("#!/usr/bin/env python3\n")
                f.write("# Generated regression tests\n")
                f.write("# Do not edit manually!\n\n")
                f.write("import unittest\n\n")
                f.write("class RegressionTests(unittest.TestCase):\n")

                if not self.tests:
                    f.write("    def test_empty(self):\n")
                    f.write("        self.assertTrue(True)  # Placeholder when no tests are generated\n\n")
                    return


                for i, test in enumerate(self.tests):
                    task_name = test["task_name"]
                    input_data = test["input"]
                    expected_output = test["expected_output"]

                    test_name = f"    def test_{task_name}_{i}(self):\n"
                    f.write(test_name)
                    f.write(f"        input_data = {repr(input_data)}\n")
                    f.write(f"        expected_output = {repr(expected_output)}\n")
                    f.write(f"        # TODO: Replace 'run_task' with the actual function to execute the task\n")
                    f.write(f"        actual_output = run_task(input_data)  # Replace with actual task execution\n")
                    f.write(f"        self.assertEqual(actual_output, expected_output)\n\n")

                f.write("def run_task(input_data):\n")
                f.write("    # TODO: Implement the task execution logic here. This is a placeholder.\n")
                f.write("    return {'status': 'success'}\n\n")

                f.write("if __name__ == '__main__':\n")
                f.write("    unittest.main()\n")

            logging.info(f"Successfully generated test file: {self.output_file}")
        except Exception as e:
            logging.error(f"Error generating test file: {e}")

def main():
    """
    Main function to parse arguments and run the regression test generator.
    """
    parser = argparse.ArgumentParser(
        description="Generate regression tests from task completion logs."
    )
    parser.add_argument(
        "--log_dir",
        type=str,
        required=True,
        help="The directory containing task completion logs.",
    )
    parser.add_argument(
        "--output_file",
        type=str,
        required=True,
        help="The file to write the generated tests to.",
    )
    parser.add_argument(
        "--task_pattern",
        type=str,
        default=r"task_(.*?)_",
        help="Regular expression to extract task names from log filenames",
    )
    args = parser.parse_args()

    generator = RegressionTestGenerator(args.log_dir, args.output_file, args.task_pattern)
    generator.parse_logs()
    generator.generate_test_file()


if __name__ == "__main__":
    main()