fix: working answer key sample

Udayraj123 · Jan 23, 2023 · 6558b86 · 6558b86
1 parent ea75858
commit 6558b86
Show file tree

Hide file tree

Showing 14 changed files with 112 additions and 78 deletions.
diff --git a/main.py b/main.py
@@ -37,6 +37,7 @@
     help="Specify an output directory.",
 )
 
+# TODO: move auto_align from args into config
 argparser.add_argument(
     "-a",
     "--autoAlign",

diff --git a/samples/community/UmarFarootAPS/csv-ans-key-from-image/answer_key.jpg b/samples/community/UmarFarootAPS/csv-ans-key-from-image/answer_key.jpg
diff --git a/samples/community/UmarFarootAPS/csv-ans-key-from-image/scans/Scan1.jpg b/samples/community/UmarFarootAPS/csv-ans-key-from-image/scans/Scan1.jpg
diff --git a/...PS/csv-ans-key-from-image/scans/Scan7.jpg → ...PS/csv-ans-key-from-image/scans/Scan2.jpg b/...PS/csv-ans-key-from-image/scans/Scan7.jpg → ...PS/csv-ans-key-from-image/scans/Scan2.jpg
diff --git a/...PS/csv-ans-key-from-image/scans/Scan3.jpg → ...v-ans-key-from-image/scans/answer_key.jpg b/...PS/csv-ans-key-from-image/scans/Scan3.jpg → ...v-ans-key-from-image/scans/answer_key.jpg
diff --git a/src/core.py b/src/core.py
@@ -17,19 +17,17 @@ class ImageInstanceOps:
 
     save_img_list: Any = defaultdict(list)
 
-    def __init__(self, tuning_config):
+    def __init__(self, tuning_config, args):
         super().__init__()
         self.tuning_config = tuning_config
+        self.args = args
         self.save_image_level = tuning_config.outputs.save_image_level
 
-    def reset_save_img(self, key):
-        self.save_img_list[key] = []
-
     def append_save_img(self, key, img):
         if self.save_image_level >= int(key):
             self.save_img_list[key].append(img.copy())
 
-    def save_or_show_stacks(self, key, filename, save_dir=None, pause=1):
+    def save_image_stacks(self, key, filename, save_dir):
         config = self.tuning_config
         if self.save_image_level >= int(key) and self.save_img_list[key] != []:
             name = os.path.splitext(filename)[0]
@@ -48,14 +46,7 @@ def save_or_show_stacks(self, key, filename, save_dir=None, pause=1):
                     int(config.dimensions.display_width * 2.5),
                 ),
             )
-            if save_dir is not None:
-                ImageUtils.save_img(
-                    f"{save_dir}stack/{name}_{str(key)}_stack.jpg", result
-                )
-            else:
-                InteractionUtils.show(
-                    f"{name}_{str(key)}", result, pause, 0, config=config
-                )
+            ImageUtils.save_img(f"{save_dir}stack/{name}_{str(key)}_stack.jpg", result)
 
     def put_label(self, img, label, size):
         config = self.tuning_config
@@ -66,6 +57,24 @@ def put_label(self, img, label, size):
         img[(pos[1] - size * 30) : (pos[1] + size * 2), :] = bg_val
         cv2.putText(img, label, pos, cv2.FONT_HERSHEY_SIMPLEX, size, clr, 3)
 
+    def reset_all_save_img(self):
+        for i in range(self.save_image_level):
+            self.save_img_list[i + 1] = []
+
+    def apply_preprocessors(self, file_path, in_omr, template):
+        tuning_config = self.tuning_config
+        # resize to conform to template
+        in_omr = ImageUtils.resize_util(
+            in_omr,
+            tuning_config.dimensions.processing_width,
+            tuning_config.dimensions.processing_height,
+        )
+
+        # run pre_processors in sequence
+        for pre_processor in template.pre_processors:
+            in_omr = pre_processor.apply_filter(in_omr, file_path)
+        return in_omr
+
     @staticmethod
     def draw_template_layout(img, template, shifted=True, draw_qvals=False, border=-1):
         img = ImageUtils.resize_util(
@@ -332,8 +341,9 @@ def get_local_threshold(
                 plt.show()
         return thr1
 
-    def read_omr_response(self, template, image, name, save_dir=None, auto_align=False):
+    def read_omr_response(self, template, image, name, save_dir=None):
         config = self.tuning_config
+        auto_align = self.args["autoAlign"]
         try:
             img = image.copy()
             # origDim = img.shape[:2]
@@ -539,7 +549,7 @@ def read_omr_response(self, template, image, name, save_dir=None, auto_align=Fal
             global_thr, _, _ = self.get_global_threshold(all_q_vals, looseness=4)
 
             logger.info(
-                f"Thresholding:\t\t global_thr: {round(global_thr, 2)} \tglobal_std_THR: {round(global_std_thresh, 2)}\t{'(Looks like a Xeroxed OMR)' if (global_thr == 255) else ''}"
+                f"Thresholding:\tglobal_thr: {round(global_thr, 2)} \tglobal_std_THR: {round(global_std_thresh, 2)}\t{'(Looks like a Xeroxed OMR)' if (global_thr == 255) else ''}"
             )
             # plt.show()
             # hist = getPlotImg()
@@ -720,8 +730,9 @@ def read_omr_response(self, template, image, name, save_dir=None, auto_align=Fal
 
             self.append_save_img(2, final_marked)
 
-            for i in range(config.outputs.save_image_level):
-                self.save_or_show_stacks(i + 1, name, save_dir)
+            if save_dir is not None:
+                for i in range(config.outputs.save_image_level):
+                    self.save_image_stacks(i + 1, name, save_dir)
 
             return omr_response, final_marked, multi_marked, multi_roll
 

diff --git a/src/defaults/config.py b/src/defaults/config.py
@@ -24,7 +24,7 @@
         },
         "outputs": {
             "show_image_level": 0,
-            "save_image_level": 2,
+            "save_image_level": 0,
             "save_detections": True,
         },
     },

diff --git a/src/entry.py b/src/entry.py
@@ -54,9 +54,10 @@ def process_dir(
 
     # Update local template (in current recursion stack)
     local_template_path = curr_dir.joinpath(constants.TEMPLATE_FILENAME)
-    if os.path.exists(local_template_path):
+    local_template_exists = os.path.exists(local_template_path)
+    if local_template_exists:
         # TODO: consider moving template inside image_instance_ops as an attribute
-        image_instance_ops = ImageInstanceOps(tuning_config)
+        image_instance_ops = ImageInstanceOps(tuning_config, args)
         template = Template(
             local_template_path,
             image_instance_ops,
@@ -65,7 +66,13 @@ def process_dir(
 
     local_evaluation_path = curr_dir.joinpath(constants.EVALUATION_FILENAME)
     if os.path.exists(local_evaluation_path):
-        evaluation_config = EvaluationConfig(local_evaluation_path, template, curr_dir)
+        if not local_template_exists:
+            logger.warning(
+                f"Found an evaluation file without a parent template file: {local_evaluation_path}"
+            )
+        evaluation_config = EvaluationConfig(
+            local_evaluation_path, template, image_instance_ops, curr_dir
+        )
 
     # Look for subdirectories for processing
     subdirs = [d for d in curr_dir.iterdir() if d.is_dir()]
@@ -97,9 +104,6 @@ def process_dir(
             )
             return
 
-        args_local = args.copy()
-        if "OverrideFlags" in template.options:
-            args_local.update(template.options["OverrideFlags"])
         logger.info(
             "------------------------------------------------------------------"
         )
@@ -108,7 +112,7 @@ def process_dir(
         logger.info(
             f"\t{'Cropping Enabled':<22}: {str('CropOnMarkers' in template.pre_processors)}"
         )
-        logger.info(f"\t{'Auto Alignment':<22}: {str(args_local['autoAlign'])}")
+        logger.info(f"\t{'Auto Alignment':<22}: {str(args['autoAlign'])}")
         logger.info(f"\t{'Using Template':<22}: { str(template)}")
         logger.info(
             f"\t{'Using pre-processors':<22}: {[pp.__class__.__name__ for pp in template.pre_processors]}"
@@ -123,9 +127,9 @@ def process_dir(
             template,
             tuning_config,
             evaluation_config,
-            args_local,
             outputs_namespace,
             image_instance_ops,
+            set_layout_flag=args["setLayout"],
         )
 
     elif not subdirs:
@@ -153,41 +157,30 @@ def process_files(
     template,
     tuning_config,
     evaluation_config,
-    args,
     outputs_namespace,
     image_instance_ops,
+    set_layout_flag=False,
 ):
     start_time = int(time())
     files_counter = 0
     STATS.files_not_moved = 0
 
     for file_path in omr_files:
         files_counter += 1
-
         file_name = file_path.name
-        args["current_file"] = file_path
 
         in_omr = cv2.imread(str(file_path), cv2.IMREAD_GRAYSCALE)
+
         logger.info("")
         logger.info(
             f"({files_counter}) Opening image: \t'{file_path}'\tResolution: {in_omr.shape}"
         )
 
-        for i in range(image_instance_ops.save_image_level):
-            image_instance_ops.reset_save_img(i + 1)
+        image_instance_ops.reset_all_save_img()
 
         image_instance_ops.append_save_img(1, in_omr)
 
-        # resize to conform to template
-        in_omr = ImageUtils.resize_util(
-            in_omr,
-            tuning_config.dimensions.processing_width,
-            tuning_config.dimensions.processing_height,
-        )
-
-        # run pre_processors in sequence
-        for pre_processor in template.pre_processors:
-            in_omr = pre_processor.apply_filter(in_omr, args)
+        in_omr = image_instance_ops.apply_preprocessors(file_path, in_omr, template)
 
         if in_omr is None:
             # Error OMR case
@@ -213,7 +206,7 @@ def process_files(
                 )
             continue
 
-        if args["setLayout"]:
+        if set_layout_flag:
             template_layout = image_instance_ops.draw_template_layout(
                 in_omr, template, shifted=False, border=2
             )
@@ -231,11 +224,7 @@ def process_files(
             multi_marked,
             _,
         ) = image_instance_ops.read_omr_response(
-            template,
-            image=in_omr,
-            name=file_id,
-            save_dir=save_dir,
-            auto_align=args["autoAlign"],
+            template, image=in_omr, name=file_id, save_dir=save_dir
         )
 
         # concatenate roll nos, set unmarked responses, etc

diff --git a/src/evaluation.py b/src/evaluation.py
@@ -4,6 +4,7 @@
 from copy import deepcopy
 from fractions import Fraction
 
+import cv2
 import pandas as pd
 from rich.table import Table
 
@@ -14,7 +15,7 @@
     MARKING_VERDICT_TYPES,
     QUESTION_STRING_REGEX_GROUPS,
 )
-from src.utils.parsing import open_evaluation_with_validation
+from src.utils.parsing import get_concatenated_response, open_evaluation_with_validation
 
 
 def parse_float_or_fraction(result):
@@ -267,7 +268,7 @@ def update_streaks_for_verdict(self, question_verdict):
 class EvaluationConfig:
     """Note: this instance will be reused for multiple omr sheets"""
 
-    def __init__(self, local_evaluation_path, template, curr_dir):
+    def __init__(self, local_evaluation_path, template, image_instance_ops, curr_dir):
         evaluation_json = open_evaluation_with_validation(local_evaluation_path)
         options, marking_scheme, source_type = map(
             evaluation_json.get, ["options", "marking_scheme", "source_type"]
@@ -285,34 +286,67 @@ def __init__(self, local_evaluation_path, template, curr_dir):
                 logger.warning(f"Note: Answer key csv does not exist at: '{csv_path}'.")
 
                 answer_key_image_path = options.get("answer_key_image_path", None)
-                image_path = curr_dir.joinpath(answer_key_image_path)
                 if not answer_key_image_path:
                     raise Exception(f"Answer key csv not found at '{csv_path}'")
+
+                image_path = str(curr_dir.joinpath(answer_key_image_path))
                 if not os.path.exists(image_path):
                     raise Exception(f"Answer key image not found at '{image_path}'")
+                self.exclude_files.append(image_path)
 
-                # TODO: trigger parent's omr reading for 'image_path' with evaluation_columns (only for regenerate)
-                # TODO: think about upcoming plugins as we'd be going out of the execution flow
                 logger.debug(f"Attempting to generate csv from image: '{image_path}'")
+                # TODO: use a common function for below changes?
+                in_omr = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+                in_omr = image_instance_ops.apply_preprocessors(
+                    image_path, in_omr, template
+                )
+                if in_omr is None:
+                    raise Exception(
+                        f"Could not read answer key from image {image_path}"
+                    )
+                (
+                    response_dict,
+                    _final_marked,
+                    _multi_marked,
+                    _multi_roll,
+                ) = image_instance_ops.read_omr_response(
+                    template,
+                    image=in_omr,
+                    name=image_path,
+                    save_dir=None,
+                )
+                omr_response = get_concatenated_response(response_dict, template)
 
-                self.exclude_files.append(image_path)
-
-            # TODO: CSV parsing/validation for each row with a (qNo, <ans string/>) pair
-            answer_key = pd.read_csv(
-                csv_path,
-                header=None,
-                names=["question", "answer"],
-                converters={"question": str, "answer": self.parse_answer_column},
-            )
+                # pickup non-empty questions
+                empty_val = template.global_empty_val
+                empty_answer_regex = (
+                    rf"{re.escape(empty_val)}+" if empty_val != "" else r"^$"
+                )
+                self.questions_in_order = sorted(
+                    question
+                    for (question, answer) in omr_response.items()
+                    if not re.search(empty_answer_regex, answer)
+                )
+                answers_in_order = [
+                    omr_response[question] for question in self.questions_in_order
+                ]
+            else:
+                # TODO: CSV parsing/validation for each row with a (qNo, <ans string/>) pair
+                answer_key = pd.read_csv(
+                    csv_path,
+                    header=None,
+                    names=["question", "answer"],
+                    converters={"question": str, "answer": self.parse_answer_column},
+                )
 
-            self.questions_in_order = answer_key["question"].to_list()
-            answers_in_order = answer_key["answer"].to_list()
-            self.validate_questions(answers_in_order)
+                self.questions_in_order = answer_key["question"].to_list()
+                answers_in_order = answer_key["answer"].to_list()
         else:
             self.questions_in_order = self.parse_questions_in_order(
                 options["questions_in_order"]
             )
             answers_in_order = options["answers_in_order"]
+
         self.validate_questions(answers_in_order)
 
         self.marking_scheme, self.question_to_scheme = {}, {}
@@ -323,7 +357,7 @@ def __init__(self, local_evaluation_path, template, curr_dir):
             if section_key != DEFAULT_SECTION_KEY:
                 self.marking_scheme[section_key] = section_marking_scheme
                 for q in section_marking_scheme.questions:
-                    # check the answer key for custom scheme here?
+                    # TODO: check the answer key for custom scheme here?
                     self.question_to_scheme[q] = section_marking_scheme
                 self.has_non_default_section = True
             else:
@@ -468,6 +502,7 @@ def prepare_explanation_table(self):
         table.add_column("Verdict")
         table.add_column("Delta")
         table.add_column("Score")
+        # TODO: Add max and min score in explanation (row-wise and total)
         if self.has_non_default_section:
             table.add_column("Section")
         if self.has_streak_scheme: