fix: Review's importer processing logic fixing | NPG-7896 (#528)

# Description - Fixed issues with the final step of processing reviews. - Moved `questions` into the config of the review's importer
input-output-hk · Aug 24, 2023 · 58a673e · 58a673e
1 parent 685e814
commit 58a673e
Show file tree

Hide file tree

Showing 10 changed files with 364 additions and 244 deletions.
diff --git a/src/event-db/stage_data/dev/00002_fund10_ideascale_params.sql b/src/event-db/stage_data/dev/00002_fund10_ideascale_params.sql
@@ -5,9 +5,14 @@ INSERT INTO config (id, id2, id3, value) VALUES (
     '',
     '{  
         "group_id": 31051,
-        "review_stage_ids": [139],
+        "review_stage_ids": [143, 145],
         "nr_allocations": [30, 80],
         "campaign_group_id": 63,
+        "questions": {
+            "You are reviewing the positive IMPACT this project will have on the Cardano Ecosystem.\nHas this project clearly demonstrated in all aspects of the proposal that it will have a positive impact on the Cardano Ecosystem?": "Impact / Alignment",
+            "You are reviewing the FEASIBILITY of this project.\nIs this project feasible based on the proposal submitted? Does the plan and associated budget and milestones look achievable? Does the team have the skills, experience, capability and capacity to complete the project successfully?": "Feasibility",
+            "You are reviewing the VALUE FOR MONEY this represents for the Treasury and the Community\nIs the funding amount requested for this project reasonable and does it provide good Value for Money to the Treasury?": "Auditability"
+        },
         "stage_ids": [4590, 4596, 4602, 4608, 4614, 4620, 4626, 4632, 4638, 4644, 4650, 4656, 4662, 4591, 4597, 4603, 4609, 4615, 4621, 4627, 4633, 4639, 4645, 4651, 4657, 4663, 4592, 4598, 4604, 4610, 4616, 4622, 4628, 4634, 4640, 4646, 4652, 4658, 4664],
         "proposals": {
             "field_mappings": {

diff --git a/src/event-db/stage_data/testnet/00002_fund10_ideascale_params.sql b/src/event-db/stage_data/testnet/00002_fund10_ideascale_params.sql
@@ -5,9 +5,14 @@ INSERT INTO config (id, id2, id3, value) VALUES (
     '',
     '{
         "group_id": 31051,
-        "review_stage_ids": [139],
+        "review_stage_ids": [143, 145],
         "nr_allocations": [30, 80],
         "campaign_group_id": 63,
+        "questions": {
+            "You are reviewing the positive IMPACT this project will have on the Cardano Ecosystem.\nHas this project clearly demonstrated in all aspects of the proposal that it will have a positive impact on the Cardano Ecosystem?": "Impact / Alignment",
+            "You are reviewing the FEASIBILITY of this project.\nIs this project feasible based on the proposal submitted? Does the plan and associated budget and milestones look achievable? Does the team have the skills, experience, capability and capacity to complete the project successfully?": "Feasibility",
+            "You are reviewing the VALUE FOR MONEY this represents for the Treasury and the Community\nIs the funding amount requested for this project reasonable and does it provide good Value for Money to the Treasury?": "Auditability"
+        },
         "stage_ids": [4590, 4596, 4602, 4608, 4614, 4620, 4626, 4632, 4638, 4644, 4650, 4656, 4662, 4591, 4597, 4603, 4609, 4615, 4621, 4627, 4633, 4639, 4645, 4651, 4657, 4663, 4592, 4598, 4604, 4610, 4616, 4622, 4628, 4634, 4640, 4646, 4652, 4658, 4664],
         "proposals": {
             "field_mappings": {

diff --git a/utilities/ideascale-importer/ideascale_importer/reviews_importer/importer.py b/utilities/ideascale-importer/ideascale_importer/reviews_importer/importer.py
@@ -2,7 +2,7 @@
 import time
 from loguru import logger
 from dataclasses import dataclass
-from typing import List
+from typing import List, Dict
 import pydantic
 import tempfile
 
@@ -55,11 +55,13 @@ async def download_file(self, review_stage_id):
                     f = open(file_name, "wb")
                     f.write(content)
                     return file_name
+
         files = []
         for review_stage_id in review_stage_ids:
             # we are interested in only assessed reviews 
             files.append(await download_file(self, review_stage_id))
         return files
+
 class Importer:
     def __init__(
         self,
@@ -132,17 +134,16 @@ async def prepare_allocations(self):
 
     async def prepare_reviews(self):
         logger.info("Prepare proposal's reviews...")
-
-        for review in self.reviews:
-            await process_ideascale_reviews(
-                ideascale_xlsx_path=review,
-                ideascale_api_url=self.ideascale_url,
-                ideascale_api_key=self.api_token,
-                allocation_path=self.allocations_path,
-                challenges_group_id=self.config.campaign_group_id,
-                fund=self.event_id,
-                output_path=self.output_path
-            )
+        await process_ideascale_reviews(
+            ideascale_xlsx_path=self.reviews,
+            ideascale_api_url=self.ideascale_url,
+            ideascale_api_key=self.api_token,
+            allocation_path=self.allocations_path,
+            challenges_group_id=self.config.campaign_group_id,
+            questions=self.config.questions,
+            fund=self.event_id,
+            output_path=self.output_path
+        )
 
     async def import_reviews(self):
         logger.info("Import reviews into Event db")
@@ -172,6 +173,7 @@ class Config:
     review_stage_ids: List[int]
     stage_ids: List[int]
     nr_allocations: List[int]
+    questions: Dict[str, str]
 
     @staticmethod
     def from_json(val: dict):

diff --git a/utilities/ideascale-importer/ideascale_importer/reviews_importer/processing/prepare.py b/utilities/ideascale-importer/ideascale_importer/reviews_importer/processing/prepare.py
@@ -1,7 +1,7 @@
 """Set of commands for the preparation of the review stage."""
 import typer
 import csv
-from typing import List
+from typing import List, Dict
 
 from .tools.importer import Importer, IdeascaleImporter
 from .tools.allocator import Allocator
@@ -55,11 +55,12 @@ def nr_allocations_map():
     return await _allocate()
 
 async def process_ideascale_reviews(
-    ideascale_xlsx_path: str,
+    ideascale_xlsx_path: List[str],
     ideascale_api_key: str,
     ideascale_api_url: str,
     allocation_path: str,
     challenges_group_id: int,
+    questions: Dict[str, str],
     output_path: str,
     fund: int,
 ):
@@ -71,9 +72,13 @@ async def _process_ideascale_reviews():
         importer.load_allocations(allocation_path, fund)
         ideascale = IdeascaleImporter(ideascale_api_key, ideascale_api_url)
         await ideascale.import_challenges(challenges_group_id)
-        reviews = ideascale.raw_reviews_from_file(ideascale_xlsx_path)
-        reviews = ideascale.group_triplets(reviews)
-        importer.reviews = reviews
+        start_id = 1
+        for xlsx in ideascale_xlsx_path:
+            if len(importer.reviews) > 0:
+                start_id = importer.reviews[-1].id + 1
+            reviews = ideascale.raw_reviews_from_file(xlsx)
+            reviews = ideascale.group_triplets(reviews, questions, start_id=start_id)
+            importer.reviews = importer.reviews + reviews
 
         postprocessor.postprocess_reviews()
         postprocessor.export_reviews(postprocessor.data.reviews, f"{output_path}/postprocessed-reviews.csv")

diff --git a/...ities/ideascale-importer/ideascale_importer/reviews_importer/processing/tools/importer.py b/...ities/ideascale-importer/ideascale_importer/reviews_importer/processing/tools/importer.py
@@ -19,38 +19,15 @@ def __init__(self):
         self.reviews: List[models.Review] = []
         self.allocations: List[models.AllocationLight] = []
 
-    def load_proposals(self, path: str):
-        """Load a list of proposals from a json file."""
-        self.proposals = utils.load_json_and_serialize(path, models.Proposal)
-
-    def load_challenges(self, path: str):
-        """Load a list of challenges from a json file."""
-        self.challenges = utils.load_json_and_serialize(path, models.Challenge)
-
     def load_pas(self, path: str):
         """Load a list of PAs from a csv file."""
         self.pas = utils.load_csv_and_serialize(path, models.Pa, {"challenges": self.challenges})
 
-    def load_reviews(self, path: str, fund: int):
-        """Load a list of reviews from a csv file."""
-        self.reviews = self.reviews + utils.load_csv_and_serialize(path, models.Review, {"fund": fund})
 
     def load_allocations(self, path: str, fund: int):
         """Load a list of allocation from a csv file."""
         self.allocations = self.allocations + utils.load_csv_and_serialize(path, models.AllocationLight, {"fund": fund})
-
-    def prepare_reviews(self, criteria) -> tuple[List[str], List[dict]]:
-        """Prepare reviews as a list of texts and mappings given a set of criteria."""
-        notes: List[str] = []
-        mapping: List[dict] = []
-        for review in self.reviews:
-            for criterium in criteria:
-                notes.append(getattr(review, criterium))
-                mapping.append({"review": review, "criterium": criterium})
-
-        return notes, mapping
-
-
+
 class IdeascaleImporter:
     """Interface with IdeaScale API."""
 
@@ -59,49 +36,12 @@ def __init__(self, api_key: str, api_url: str = "https://temp-cardano-sandbox.id
         self.api_key = api_key
         self.api_url = api_url
         self.inner = utils.JsonHttpClient(self.api_url)
-        self.N_WORKERS = 6
+        self.N_WORKERS = 3
 
         self.challenges: List[models.Challenge] = []
         self.proposals: List[models.Proposal] = []
         self.pas: List[models.Pas] = []
-
-    def update_comrevs_emails(self, historic_pas: List[models.Pa] = []):
-
-        self.new_pas = []
-        async def wrapped():
-            tasks: asyncio.Queue = asyncio.Queue()
-            for pa in historic_pas:
-                tasks.put_nowait(self.get_user(pa))
-
-            async def worker():
-                while not tasks.empty():
-                    await tasks.get_nowait()
-
-            await asyncio.gather(*[worker() for _ in range(self.N_WORKERS)])
-
-        asyncio.run(wrapped())
-
-    async def get_user(self, pa: models.Pa):
-        try:
-            res = await self._get(f"/a/rest/v1/members/email/{pa.email}")
-            new_pa = {
-                'old': pa.email,
-                'new': res['email']
-            }
-            self.new_pas.append(new_pa)
-        except utils.GetFailed:
-            new_pa = {
-                'old': pa.email,
-                'new': ''
-            }
-            self.new_pas.append(new_pa)
-        except:
-            new_pa = {
-                'old': pa.email,
-                'new': ''
-            }
-            self.new_pas.append(new_pa)
-
+        self.review_stats = {}
 
     async def import_com_revs(
         self, group_id: int, page_size: int = 50, start_id: int = 0, historic_pas: List[models.Pa] = []
@@ -196,66 +136,18 @@ async def worker(d: WorkerData, stage_id: int):
                     d.done = True
         d = {}
         for stage_id in stage_ids: 
+            print(f"Starting {stage_id}")
             d = WorkerData(stage_id)
             worker_tasks = [asyncio.create_task(worker(d, stage_id)) for _ in range(self.N_WORKERS)]
             for task in worker_tasks:
                 await task
             self.proposals.extend(d.proposals)
 
-    def change_proposals_stage(self, target_stage: int):
-        """Change proposals stage."""
-
-        logger.info(f"Number of proposal to move: {len(self.proposals)}")
-
-        async def wrapped():
-            tasks: asyncio.Queue = asyncio.Queue()
-            for proposal in self.proposals:
-                tasks.put_nowait(self._change_proposal_stage(proposal, target_stage))
-
-            async def worker():
-                while not tasks.empty():
-                    await tasks.get_nowait()
-
-            await asyncio.gather(*[worker() for _ in range(self.N_WORKERS)])
-
-        asyncio.run(wrapped())
-
-    def change_proposals_campaign(self, target_campaign: int, target_stage: int):
-        """Change proposals stage."""
-
-        logger.info(f"Number of proposal to move: {len(self.proposals)}")
-
-        async def wrapped():
-            tasks: asyncio.Queue = asyncio.Queue()
-            for proposal in self.proposals:
-                tasks.put_nowait(self._change_proposal_campaign(proposal, target_campaign, target_stage))
-
-            async def worker():
-                while not tasks.empty():
-                    await tasks.get_nowait()
-
-            await asyncio.gather(*[worker() for _ in range(self.N_WORKERS)])
-
-        asyncio.run(wrapped())
-
-    async def _change_proposal_stage(self, proposal: models.Proposal, target_stage: int):
-        """Change proposal stage."""
-        await self._post(f"/a/rest/v1/ideas/{proposal.id}/changeStage/{target_stage}")
-
-    async def _change_proposal_campaign(self, proposal: models.Proposal, target_campaign: int, target_stage: int):
-        """Change proposal stage."""
-        await self._post(f"/a/rest/v1/ideas/{proposal.id}/changeCampaign/{target_campaign}/targetStage/{target_stage}")
-
     async def _get(self, path: str):
         """Execute a GET request."""
         headers = {"api_token": self.api_key}
         return await self.inner.get(path, headers)
 
-    async def _post(self, path: str, data: dict = None):
-        """Execute a POST request."""
-        headers = {"api_token": self.api_key}
-        return await self.inner.post(path, headers=headers)
-
     def transform_pas(self, reviewers: List[models.IdeascaleComRev], historic_pas: List[models.Pa], start_id: int = 0):
         """Merge historic reviewers with the new ones and assign level and challenges accordingly."""
         challenges_map = {}
@@ -299,7 +191,7 @@ def raw_reviews_from_file(self, path: str):
         _cache_results = list(_results)
         for r in reviews:
             _related = next(
-                (_r for _r in _cache_results if (_r.email == r.email and _r.idea_title == r.idea_title and _r.date == r.date)), None
+                (_r for _r in _cache_results if (_r.email == r.email and _r.idea_title == r.idea_title)), None
             )
             if _related is None:
                 logger.error("File malformed...")
@@ -309,16 +201,15 @@ def raw_reviews_from_file(self, path: str):
 
         return reviews
 
-    def group_triplets(self, _reviews):
+    def group_triplets(self, reviews, questions, start_id: int = 0):
         """Given a list of reviews divided by criteria, group them for the complete review."""
         groups = {}
-        questions = {
-            "This proposal effectively addresses the challenge": "Impact / Alignment",
-            "Given experience and plan presented it is highly likely this proposal will be implemented successfully": "Feasibility",
-            "The information provided is sufficient to audit the progress and the success of the proposal.": "Auditability",
-        }
+        _questions = {}
+        for q in questions:
+            _questions[q.replace('\n', '').replace('\r', '').replace(' ', '')] = questions[q]
+        questions = _questions
         logger.info("Group triplets...")
-        for review in _reviews:
+        for review in reviews:
             key = f"{review.idea_id}-{review.email}"
             if review.question in questions:
                 if key not in groups:
@@ -327,34 +218,27 @@ def group_triplets(self, _reviews):
 
         reviews = []
         logger.info("Parse reviews...")
-        _questions = list(questions.values())
+        questions = list(questions.values())
+        counter = 0
         for idx, g in enumerate(groups.keys()):
             if len(groups[g].keys()) == 3:
                 triplet = groups[g]
                 review_dict = {
-                    "id": idx,
-                    "Assessor": triplet[_questions[0]].email,
-                    "Impact / Alignment Note": triplet[_questions[0]].note,
-                    "Impact / Alignment Rating": triplet[_questions[0]].score,
-                    "Feasibility Note": triplet[_questions[1]].note,
-                    "Feasibility Rating": triplet[_questions[1]].score,
-                    "Auditability Note": triplet[_questions[2]].note,
-                    "Auditability Rating": triplet[_questions[2]].score,
+                    "id": counter + start_id,
+                    "Assessor": triplet[questions[0]].email,
+                    "Impact / Alignment Note": triplet[questions[0]].note,
+                    "Impact / Alignment Rating": triplet[questions[0]].score,
+                    "Feasibility Note": triplet[questions[1]].note,
+                    "Feasibility Rating": triplet[questions[1]].score,
+                    "Auditability Note": triplet[questions[2]].note,
+                    "Auditability Rating": triplet[questions[2]].score,
                 }
                 proposal_dict = {
-                    "id": triplet[_questions[0]].idea_id,
-                    "url": triplet[_questions[0]].idea_url,
-                    "title": triplet[_questions[0]].idea_title,
-                    "campaign_id": triplet[_questions[0]].idea_challenge.id,
+                    "id": triplet[questions[0]].idea_id,
+                    "url": triplet[questions[0]].idea_url,
+                    "title": triplet[questions[0]].idea_title,
+                    "campaign_id": triplet[questions[0]].idea_challenge.id,
                 }
                 reviews.append(models.Review(**review_dict, proposal=models.Proposal(**proposal_dict)))
+                counter = counter + 1
         return reviews
-
-    def extract_proposers_emails(self):
-        proposers_emails = []
-        public_emails = []
-        for proposal in self.proposals:
-            proposers_emails = proposers_emails + [{"email": a.email} for a in proposal.authors]
-            public_emails.append({"email": proposal.public_email})
-
-        return proposers_emails, public_emails