From c01b90a60aff8ba46338d1b0214e158fbba5b4b5 Mon Sep 17 00:00:00 2001 From: Spyros Date: Sat, 3 Aug 2024 16:51:28 +0100 Subject: [PATCH] import from export format --- application/defs/cre_defs.py | 14 +- application/tests/spreadsheet_parsers_test.py | 29 +- application/tests/utils/data_gen.py | 478 ++++++++---------- application/utils/spreadsheet_parsers.py | 69 ++- 4 files changed, 289 insertions(+), 301 deletions(-) diff --git a/application/defs/cre_defs.py b/application/defs/cre_defs.py index 0e9accb0..e8b33d5b 100644 --- a/application/defs/cre_defs.py +++ b/application/defs/cre_defs.py @@ -7,9 +7,7 @@ from application.defs import cre_exceptions -class ExportFormat( - Enum -): # TODO: this can likely be replaced with a method that iterates over an object's vars and formats headers to +class ExportFormat: # TODO: this can likely be replaced with a method that iterates over an object's vars and formats headers to # :: separator = "|" section = "name" @@ -24,14 +22,8 @@ class ExportFormat( tooltype = "ToolType" # sectionID = "SectionID" - @classmethod - def attributes(): - return [ - "name", - "hyperlink", - "description", - "id", - ] + def __str__(self): + return str(self.value) @staticmethod def get_doctype(header: str) -> Optional["Credoctypes"]: diff --git a/application/tests/spreadsheet_parsers_test.py b/application/tests/spreadsheet_parsers_test.py index 324fa481..3b5e2061 100644 --- a/application/tests/spreadsheet_parsers_test.py +++ b/application/tests/spreadsheet_parsers_test.py @@ -1,3 +1,5 @@ +import json +from pprint import pprint import unittest from application.tests.utils import data_gen from application.defs import cre_defs as defs @@ -8,30 +10,15 @@ class TestParsers(unittest.TestCase): + def test_parse_export_format(self) -> None: - """Given - * CRE "C1" -> Standard "S1" section "SE1" - * CRE "C2" -> CRE "C3" linktype contains - * CRE "C3" -> "C2" (linktype is part of), Standard "S3" section "SE3" - * CRE "C5" -> Standard "S1" section "SE1" subsection "SBE1" - * CRE "C5" -> Standard "S1" section "SE1" subsection "SBE11" - * CRE "C6" -> Standard "S1" section "SE11", Standard "S2" section "SE22", CRE "C7"(linktype contains) , CRE "C8" (linktype contains) - * Standard "SL" - * Standard "SL2" -> Standard "SLL" - # * CRE "C9" - Expect: - 9 CRES - 9 standards - appropriate links among them based on the arrows above - """ + input_data, expected = data_gen.export_format_data() - result = parse_export_format(input_data) + cres, standards = parse_export_format(input_data) self.maxDiff = None - for key, val in result.items(): - # self.assertDictEqual(expected[key].todict(), val.todict()) - expected[key].links = [] - val.links = [] - self.assertDictEqual(val.todict(), expected[key].todict()) + + self.assertListEqual(list(cres), list(expected[defs.Credoctypes.CRE])) + self.assertListEqual(list(expected[defs.Credoctypes.Standard]), list(standards)) def test_parse_hierarchical_export_format(self) -> None: # TODO(northdpole): add a tags linking test diff --git a/application/tests/utils/data_gen.py b/application/tests/utils/data_gen.py index c4222c1c..172d95db 100644 --- a/application/tests/utils/data_gen.py +++ b/application/tests/utils/data_gen.py @@ -516,290 +516,252 @@ def root_csv_minimum_data(): def export_format_data(): input_data = [ { - "CRE 0": "111-111|C1", - "S1:hyperlink": "https://example.com/S1", - "S1:name": "SE1", - "S1:id": "id1", - "SL:hyperlink": "", - "SL:name": "", - "SL:id": "", - "SL2:hyperlink": "", - "SL2:name": "", - "SL2:id": "", - "SLL:hyperlink": "", - "SLL:name": "", - "SLL:id": "", + "CRE 0": "000-001|C0", + "CRE 1": "", + "CRE 2": "", + "CRE 3": "", + "CRE 4": "", + "CRE 5": "", + "S1|hyperlink": "https://example.com/S1", + "S1|name": "SE1", + "S1|id": "id1", + "S1|description": "SE1 description", + "S2|hyperlink": "https://example.com/S1", + "S2|name": "", + "S2|id": "", + "S2|description": "", + "S3|hyperlink": "", + "S3|name": "", + "S3|id": "", + "S3|description": "", + "S4|hyperlink": "", + "S4|name": "", + "S4|id": "", + "S4|description": "", + "S5|hyperlink": "", + "S5|name": "", + "S5|id": "", + "S5|description": "", + "SL|hyperlink": "", + "SL|name": "", + "SL|id": "", + "SL2|hyperlink": "", + "SL2|name": "", + "SL2|id": "", + "SLL|hyperlink": "", + "SLL|name": "", + "SLL|id": "", }, { "CRE 1": "222-222|C2", }, { "CRE 3": "333-333|C3", - "S3:hyperlink": "https://example.com/S3", - "S3:description": "SE3", - "S3:name": "SE3 section", + "S3|hyperlink": "https://example.com/S3", + "S3|description": "SE3", + "S3|name": "SE3", + "S3|id": "5.3", }, { - "CRE 4": "555-555|C5", - "S1:hyperlink": "https://example.com/S1", - "S1:name": "SE1", + "CRE 4": "444-444|C4", + "S1|hyperlink": "https://example.com/S1", + "S1|name": "SE1", + "S1|id": "id1", }, { - "CRE:description": "C6 description", - "CRE:id": "666-666", - "CRE:name": "C6", - "Standard:S1:hyperlink": "https://example.com/S1", - "Standard:S1:link_type": "Linked To", - "Standard:S1:section": "SE1", - "Standard:S1:subsection": "SBE11", - "Tool:S2:hyperlink": "https://example.com/S2", - "Tool:S2:link_type": "Linked To", - "Tool:S2:description": "SE2", - "Tool:S2:ToolType": "Offensive", - "Tool:S2:SectionID": "0", - "Tool:S2:section": "rule-0", - "Code:S3:hyperlink": "", - "Code:S3:link_type": "", - "Code:S3:description": "", - "Linked_CRE_0:id": "777-777", - "Linked_CRE_0:link_type": "Contains", - "Linked_CRE_0:name": "C7", - "Linked_CRE_1:id": "888-888", - "Linked_CRE_1:link_type": "Contains", - "Linked_CRE_1:name": "C8", - "SL:hyperlink": "", - "SL:link_type": "", - "SL:section": "", - "SL:subsection": "", - "SL2:hyperlink": "", - "SL2:link_type": "", - "SL2:section": "", - "SL2:subsection": "", - "SLL:hyperlink": "", - "SLL:link_type": "", - "SLL:section": "", - "SLL:subsection": "", + "CRE 5": "555-555|C5", + "S1|hyperlink": "https://example.com/S1", + "S1|name": "SE1", + "S1|id": "id1", }, { - "CRE:description": "", - "CRE:id": "", - "CRE:name": "", - "Standard:S1:hyperlink": "", - "Standard:S1:link_type": "", - "Standard:S1:section": "", - "Standard:S1:subsection": "", - "S2:hyperlink": "", - "S2:link_type": "", - "S2:section": "", - "S2:subsection": "", - "Code:S3:hyperlink": "", - "Code:S3:link_type": "", - "Code:S3:description": "", - "Linked_CRE_0:id": "", - "Linked_CRE_0:link_type": "", - "Linked_CRE_0:name": "", - "Linked_CRE_1:id": "", - "Linked_CRE_1:link_type": "", - "Linked_CRE_1:name": "", - "SL:hyperlink": "https://example.com/SL", - "SL:link_type": "", - "SL:section": "SSL", - "SL:subsection": "SBESL", - "SL2:hyperlink": "", - "SL2:link_type": "", - "SL2:section": "", - "SL2:subsection": "", - "SLL:hyperlink": "", - "SLL:link_type": "", - "SLL:section": "", - "SLL:subsection": "", + "CRE 0": "666-666|C6", }, { - "CRE:description": "", - "CRE:id": "", - "CRE:name": "", - "Standard:S1:hyperlink": "", - "Standard:S1:link_type": "", - "Standard:S1:section": "", - "Standard:S1:subsection": "", - "S2:hyperlink": "", - "S2:link_type": "", - "S2:section": "", - "S2:subsection": "", - "Code:S3:hyperlink": "", - "Code:S3:link_type": "", - "Code:S3:description": "", - "Linked_CRE_0:id": "", - "Linked_CRE_0:link_type": "", - "Linked_CRE_0:name": "", - "Linked_CRE_1:id": "", - "Linked_CRE_1:link_type": "", - "Linked_CRE_1:name": "", - "SL:hyperlink": "", - "SL:link_type": "", - "SL:section": "", - "SL:subsection": "SESL", - "SL2:hyperlink": "https://example.com/SL2", - "SL2:link_type": "", - "SL2:section": "SSL2", - "SL2:subsection": "SBESL2", - "SLL:hyperlink": "https://example.com/SLL", - "SLL:link_type": "SAM", - "SLL:section": "SSLL", - "SLL:subsection": "SBESLL", + "CRE 0": "777-777|C7", + }, + { + "CRE 0": "888-888|C8", + }, + { + "S1|hyperlink": "", + "S1|name": "", + "S2|hyperlink": "", + "S2|name": "", + "SL|hyperlink": "https://example.com/SL", + "SL|name": "SSL", + "SL|id": "slid", + "SL2|hyperlink": "", + "SL2|name": "", + "SLL|hyperlink": "", + "SLL|name": "", + }, + { + "SL2|hyperlink": "https://example.com/SL2", + "SL2|id": "sl2id", + "SL2|name": "SSL2", + "SLL|hyperlink": "https://example.com/SLL", + "SLL|name": "SSLL", + "SLL|id": "SBESLL", + "SLL|hyperlink": "", }, ] expected = { - "C1": defs.CRE( - id="111-111", - description="C1 description", - name="C1", - links=[ - defs.Link( - ltype=defs.LinkTypes.LinkedTo, - document=defs.Standard( - name="S1", - section="SE1", - subsection="SBE1", - hyperlink="https://example.com/S1", + defs.Credoctypes.CRE.value: [ + defs.CRE( + id="000-001", + name="C0", + links=[ + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=defs.Standard( + name="S1", + section="SE1", + sectionID="id1", + hyperlink="https://example.com/S1", + ), + ), + defs.Link( + ltype=defs.LinkTypes.Contains, + document=defs.CRE( + id="222-222", + name="C2", + ), + ), + ], + ), + defs.CRE( + id="222-222", + name="C2", + links=[ + defs.Link( + ltype=defs.LinkTypes.Contains, + document=defs.CRE(id="333-333", name="C3"), + ) + ], + ), + defs.CRE( + id="333-333", + name="C3", + links=[ + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=defs.Standard( + name="S3", + section="SE3", + sectionID="5.3", + hyperlink="https://example.com/S3", + description="SE3", + ), ), - ) - ], - ), - "C2": defs.CRE( - id="222-222", - description="C2 description", - name="C2", - links=[ - defs.Link( - ltype=defs.LinkTypes.Contains, - document=defs.CRE(id="333-333", name="C3"), - ) - ], - ), - "C3": defs.CRE( - id="333-333", - description="C3 description", - name="C3", - links=[ - defs.Link( - ltype=defs.LinkTypes.PartOf, - document=defs.CRE( - id="222-222", description="C2 description", name="C2" + defs.Link( + ltype=defs.LinkTypes.Contains, + document=defs.CRE(id="444-444", name="C4"), ), - ), - defs.Link( - ltype=defs.LinkTypes.LinkedTo, - document=defs.Code( - name="S3", - description="SE3", - hyperlink="https://example.com/S3", + ], + ), + defs.CRE( + id="444-444", + name="C4", + links=[ + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=defs.Standard( + name="S1", + section="SE1", + sectionID="id1", + hyperlink="https://example.com/S1", + ), ), - ), - ], - ), - "C5": defs.CRE( - id="555-555", - description="C5 description", - name="C5", - links=[ - defs.Link( - ltype=defs.LinkTypes.LinkedTo, - document=defs.Standard( - name="S1", - section="SE1", - subsection="SBE1", - hyperlink="https://example.com/S1", + defs.Link( + ltype=defs.LinkTypes.Contains, + document=defs.CRE(id="555-555", name="C5"), ), - ), - defs.Link( - ltype=defs.LinkTypes.LinkedTo, - document=defs.Standard( - name="S1", - section="SE1", - subsection="SBE11", - hyperlink="https://example.com/S1", + ], + ), + defs.CRE( + id="555-555", + name="C5", + links=[ + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=defs.Standard( + name="S1", + section="SE1", + sectionID="id1", + hyperlink="https://example.com/S1", + ), ), - ), - ], - ), - "C6": defs.CRE( - id="666-666", - description="C6 description", - name="C6", - links=[ - defs.Link( - ltype=defs.LinkTypes.LinkedTo, - document=defs.Tool( - name="S2", - section="rule-0", - sectionID="0", - tooltype=defs.ToolTypes.Offensive, - description="SE2", - hyperlink="https://example.com/S2", + ], + ), + defs.CRE( + id="666-666", + name="C6", + ), + defs.CRE( + id="777-777", + name="C7", + ), + defs.CRE( + id="888-888", + name="C8", + ), + ], + defs.Credoctypes.Standard.value: [ + defs.Standard( + name="S1", + section="SE1", + description="", + sectionID="id1", + hyperlink="https://example.com/S1", + links=[ + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=defs.CRE(id="000-001", name="C0"), ), - ), - defs.Link( - ltype=defs.LinkTypes.LinkedTo, - document=defs.Standard( - name="S1", - section="SE1", - subsection="SBE11", - hyperlink="https://example.com/S1", + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=defs.CRE(id="444-444", name="C4"), ), - ), - defs.Link( - ltype=defs.LinkTypes.Contains, - document=defs.CRE(id="777-777", name="C7"), - ), - defs.Link( - ltype=defs.LinkTypes.Contains, - document=defs.CRE(id="888-888", name="C8"), - ), - ], - ), - "C7": defs.CRE( - id="777-777", - name="C7", - links=[ - defs.Link( - ltype=defs.LinkTypes.PartOf, - document=defs.CRE( - id="666-666", description="C6 description", name="C6" + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=defs.CRE(id="555-555", name="C5"), ), - ) - ], - ), - "C8": defs.CRE( - id="888-888", - name="C8", - links=[ - defs.Link( - ltype=defs.LinkTypes.PartOf, - document=defs.CRE( - id="666-666", description="C6 description", name="C6" + ], + ), + defs.Standard( + name="S3", + section="SE3", + description="SE3", + sectionID="5.3", + hyperlink="https://example.com/S3", + links=[ + defs.Link( + ltype=defs.LinkTypes.LinkedTo, + document=defs.CRE(id="333-333", name="C3"), ), - ) - ], - ), - "SL2:SSL2": defs.Standard( - name="SL2", - section="SSL2", - subsection="SBESL2", - hyperlink="https://example.com/SL2", - ), - "SL:SSL": defs.Standard( - name="SL", - section="SSL", - subsection="SBESL", - hyperlink="https://example.com/SL", - ), - "SLL:SSLL": defs.Standard( - name="SLL", - section="SSLL", - subsection="SBESLL", - hyperlink="https://example.com/SLL", - ), + ], + ), + defs.Standard( + name="SL", + section="SSL", + description="", + sectionID="slid", + hyperlink="https://example.com/SL", + ), + defs.Standard( + name="SL2", + section="SSL2", + sectionID="sl2id", + hyperlink="https://example.com/SL2", + ), + defs.Standard( + name="SLL", + description="", + section="SSLL", + sectionID="SBESLL", + ), + ], } return input_data, expected diff --git a/application/utils/spreadsheet_parsers.py b/application/utils/spreadsheet_parsers.py index f31a24a7..df501f18 100644 --- a/application/utils/spreadsheet_parsers.py +++ b/application/utils/spreadsheet_parsers.py @@ -1,3 +1,4 @@ +from pprint import pprint import logging import re from copy import copy @@ -139,41 +140,87 @@ def parse_export_format(lfile: List[Dict[str, Any]]) -> Dict[str, defs.Document] Given: a spreadsheet written by prepare_spreadsheet() return a list of CRE docs """ - - cres: Dict[str,defs.CRE] + cres: Dict[str, defs.CRE] = {} standards: Dict[str, defs.Standard] = {} max_internal_cre_links = len( set([k for k in lfile[0].keys() if k.startswith("CRE")]) ) standard_names = set( - [k.split("|") for k in lfile[0].keys() if not k.startswith("CRE")] + [k.split("|")[0] for k in lfile[0].keys() if not k.startswith("CRE")] ) + highest_cre = None + highest_index = max_internal_cre_links + 1 + + previous_cre = None + previous_index = max_internal_cre_links + 1 for mapping_line in lfile: working_cre = None working_standard = None # get highest numbered CRE entry - for i in range(max_internal_cre_links - 1, 0, -1): + for i in range(max_internal_cre_links - 1, -1, -1): if not is_empty(mapping_line.get(f"CRE {i}")): - entry = mapping_line.get(f"CRE {i}").split("|") - working_cre = defs.CRE(name=entry[1],id=entry[0]) + entry = mapping_line.get(f"CRE {i}").split(defs.ExportFormat.separator) + working_cre = defs.CRE(name=entry[1], id=entry[0]) + + if previous_index < i: # we found a higher hierarchy CRE + previous_index = i + highest_cre = previous_cre + cres[highest_cre.id] = highest_cre.add_link( + defs.Link( + document=working_cre.shallow_copy(), + ltype=defs.LinkTypes.Contains, + ) + ) + elif highest_index < i: # we found a higher hierarchy CRE + cres[highest_cre.id] = highest_cre.add_link( + defs.Link( + document=working_cre.shallow_copy(), + ltype=defs.LinkTypes.Contains, + ) + ) + elif highest_cre == None: + highest_cre = working_cre + highest_index = i + + previous_index = i + previous_cre = working_cre break for s in standard_names: - if mapping_line.get(f"{s}{defs.ExportFormat.separator}name"): + if not is_empty(mapping_line.get(f"{s}{defs.ExportFormat.separator}name")): + working_standard = defs.Standard( name=s, sectionID=mapping_line.get(f"{s}{defs.ExportFormat.separator}id"), section=mapping_line.get(f"{s}{defs.ExportFormat.separator}name"), - hyperlink=mapping_line.get(f"{s}{defs.ExportFormat.separator}hyperlink"), - description=mapping_line.get(f"{s}{defs.ExportFormat.separator}description") + hyperlink=mapping_line.get( + f"{s}{defs.ExportFormat.separator}hyperlink", "" + ), + description=mapping_line.get( + f"{s}{defs.ExportFormat.separator}description", "" + ), ) + if standards.get(working_standard.id): + working_standard = standards[working_standard.id] + if working_cre: - working_cre.add_link(defs.Link(document=working_standard)) + working_cre.add_link( + defs.Link( + document=working_standard.shallow_copy(), + ltype=defs.LinkTypes.LinkedTo, + ) + ) + working_standard.add_link( + defs.Link( + document=working_cre.shallow_copy(), + ltype=defs.LinkTypes.LinkedTo, + ) + ) standards[working_standard.id] = working_standard if working_cre: cres[working_cre.id] = working_cre - return cres.values(),standards.values() + return cres.values(), standards.values() @dataclass