Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(general): Add an option to pick beetwen native OpenAI and Azure OpenAI #5569

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6a87a40
Azure OpenAI
Sep 14, 2023
58351d2
CLI Command Reference updated as well
Sep 14, 2023
f4057ae
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 14, 2023
a166987
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 15, 2023
14a723a
fix linter and static type issues
Sep 15, 2023
02b7bbf
unittests added for '_should_run'
Sep 15, 2023
9b5dbc9
testing correct and missing configurations
Sep 15, 2023
e8cdd37
line added - to adjust code with 'main' version
Sep 15, 2023
44be5a8
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 18, 2023
e1ae799
minimize unittest files for Azure OpenAI
Sep 18, 2023
d91b94e
typos in documentation fixed
Sep 18, 2023
c8101c7
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 19, 2023
71ccce6
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 19, 2023
442b9c4
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 20, 2023
70d3673
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 20, 2023
ed54de3
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 21, 2023
c6d7d83
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 21, 2023
97f5fd3
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 22, 2023
1e8d3b5
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 26, 2023
1a9a35a
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 27, 2023
61e06b5
Merge branch 'main' into feat/AzureOpenAI
lif2 Sep 28, 2023
972dd4c
Merge branch 'main' into feat/AzureOpenAI
lif2 Oct 2, 2023
cc809dd
Merge branch 'main' into feat/AzureOpenAI
lif2 Oct 10, 2023
b7cbeb5
Merge branch 'main' into feat/AzureOpenAI
lif2 Oct 15, 2023
e00412f
Merge branch 'main' into feat/AzureOpenAI
lif2 Oct 15, 2023
5e6c7c9
Merge branch 'main' into feat/AzureOpenAI
lif2 Oct 17, 2023
629d717
Merge branch 'main' into feat/AzureOpenAI
lif2 Oct 18, 2023
c1d5d62
Merge branch 'main' into feat/AzureOpenAI
lif2 Oct 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 56 additions & 19 deletions checkov/common/output/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from checkov.common.output.record import Record
from typing_extensions import Self

# Common OpenAI environment variables
OPENAI_MAX_FINDINGS = int(os.getenv("CKV_OPENAI_MAX_FINDINGS", 5))
OPENAI_MAX_TOKENS = int(os.getenv("CKV_OPENAI_MAX_TOKENS", 512))
OPENAI_MODEL = os.getenv("CKV_OPENAI_MODEL", "gpt-3.5-turbo")
Expand All @@ -29,10 +30,32 @@
class OpenAi:
_instance = None # noqa: CCE003 # singleton

def __new__(cls, api_key: str | None = None) -> Self:
def _validate_azure_env(self, value: str | None = None, environment_variable_name: str | None = None) -> bool:
if (value is None):
print(
colored(
f"ERROR: Configuration for Azure OpenAI is missing: Please specify {environment_variable_name} environment variable for --openai-api-type '{self._api_type}' type.",
"red",
)
)
Comment on lines +35 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just use a normal log message

return False
return True

def __new__(cls, api_key: str | None = None, api_type: str = "default") -> Self:
if cls._instance is None:
cls._instance = super().__new__(cls)
cls._should_run = True if api_key else False
cls._api_type = api_type.lower()
if (cls._api_type == 'azure'):
Comment on lines +48 to +49
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no need to create a class attribute for this, when you set it on openai a few lines later

cls.AZURE_OPENAI_API_ENDPOINT = os.getenv("CKV_AZURE_OPENAI_API_ENDPOINT", None)
cls.AZURE_OPENAI_API_VERSION = os.getenv("CKV_AZURE_OPENAI_API_VERSION", '2023-05-15')
cls.AZURE_OPENAI_DEPLOYMENT_NAME = os.getenv("CKV_AZURE_OPENAI_DEPLOYMENT_NAME", None)
cls._should_run = cls._should_run & cls._validate_azure_env(cls._instance, cls.AZURE_OPENAI_API_ENDPOINT, 'CKV_AZURE_OPENAI_API_ENDPOINT')
cls._should_run = cls._should_run & cls._validate_azure_env(cls._instance, cls.AZURE_OPENAI_API_VERSION, 'CKV_AZURE_OPENAI_API_VERSION')
cls._should_run = cls._should_run & cls._validate_azure_env(cls._instance, cls.AZURE_OPENAI_DEPLOYMENT_NAME, 'CKV_AZURE_OPENAI_DEPLOYMENT_NAME')
Comment on lines +53 to +55
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of validating each one by one, pass all to a validate function and decide, if everything is set as needed

openai.api_type = cls._api_type
openai.api_base = cls.AZURE_OPENAI_API_ENDPOINT if cls.AZURE_OPENAI_API_ENDPOINT is not None else ""
openai.api_version = cls.AZURE_OPENAI_API_VERSION
openai.api_key = api_key

return cls._instance
Expand Down Expand Up @@ -70,24 +93,38 @@ async def _chat_complete(self, record: Record) -> None:
return

try:
completion = await openai.ChatCompletion.acreate( # type:ignore[no-untyped-call]
model=OPENAI_MODEL,
messages=[
{"role": "system", "content": "You are a security tool"},
{
"role": "user",
"content": "".join(
[
f"fix following code, which violates checkov policy '{record.check_name}':\n",
*[line for _, line in record.code_block],
]
),
},
{"role": "user", "content": "Explain"},
],
temperature=0,
max_tokens=OPENAI_MAX_TOKENS,
)
# define common messages array
messages = [
{"role": "system", "content": "You are a security tool"},
{
"role": "user",
"content": "".join(
[
f"fix following code, which violates checkov policy '{record.check_name}':\n",
*[line for _, line in record.code_block],
]
),
},
{"role": "user", "content": "Explain"},
],
# depends on api_type, call ChatCompletion differently
logging.info(f"[_chat_complete]: self._api_type: {self._api_type}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of logging it here, just do it once at the end of the class instance creation, then you can also log other infos like deployment name and endpoint used.

if (self._api_type == 'azure'):
completion = await openai.ChatCompletion.acreate( # type:ignore[no-untyped-call]
engine=self.AZURE_OPENAI_DEPLOYMENT_NAME,
messages=messages[0],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
messages=messages[0],
messages=messages,

otherwise you only send the first message, same a few lines lower

temperature=0,
max_tokens=OPENAI_MAX_TOKENS,
)
else:
completion = await openai.ChatCompletion.acreate( # type:ignore[no-untyped-call]
model=OPENAI_MODEL,
messages=messages[0],
temperature=0,
max_tokens=OPENAI_MAX_TOKENS,
)

logging.info(f"[COMPLETION]{completion}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
logging.info(f"[COMPLETION]{completion}")
logging.debug(f"OpenAI request returned: {completion}")

or something similar to have a direct context. Also debug level is more than enough.

logging.info(f"OpenAI request consumed {completion.usage.total_tokens} tokens")

details = self._parse_completion_response(completion_content=completion.choices[0].message.content)
Expand Down
3 changes: 2 additions & 1 deletion checkov/common/output/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ def print_console(
use_bc_ids: bool = False,
summary_position: str = 'top',
openai_api_key: str | None = None,
openai_api_type: str = "default",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove all of the openai_api_type parameter changes and change it to an env var with a default of default

) -> str:
summary = self.get_summary()
output_data = colored(f"{self.check_type} scan results:\n", "blue")
Expand Down Expand Up @@ -305,7 +306,7 @@ def print_console(
for record in self.passed_checks:
output_data += record.to_string(compact=is_compact, use_bc_ids=use_bc_ids)
if self.failed_checks:
OpenAi(api_key=openai_api_key).enhance_records(runner_type=self.check_type, records=self.failed_checks)
OpenAi(api_key=openai_api_key, api_type=openai_api_type).enhance_records(runner_type=self.check_type, records=self.failed_checks)
for record in self.failed_checks:
output_data += record.to_string(compact=is_compact, use_bc_ids=use_bc_ids)
if not is_quiet:
Expand Down
1 change: 1 addition & 0 deletions checkov/common/runners/runner_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ def print_reports(
use_bc_ids=config.output_bc_ids,
summary_position=config.summary_position,
openai_api_key=config.openai_api_key,
openai_api_type=config.openai_api_type,
)

self._print_to_console(
Expand Down
12 changes: 12 additions & 0 deletions checkov/common/util/ext_argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,3 +549,15 @@ def add_parser_args(self) -> None:
"resource code to OpenAI to request remediation guidance. This will use your OpenAI credits. "
"Set your number of findings that will receive enhanced guidelines using CKV_OPENAI_MAX_FINDINGS",
)
self.add(
"--openai-api-type",
env_var="CKV_OPENAI_API_TYPE",
choices=['azure', 'default'],
default='default',
help="By switching this flag to 'azure', you are able to send violated policies to Azure OpenAI service. "
"You have to provide Key, either with --openai-api-key or CKV_OPENAI_API_KEY. "
"Before you switch to 'azure' you also have to define CKV_AZURE_OPENAI_API_ENDPOINT and CKV_AZURE_OPENAI_DEPLOYMENT_NAME environment variables. "
"CKV_AZURE_OPENAI_API_ENDPOINT - is the Azure OpenAI service endpoint, eg: 'https://eastus.api.cognitive.microsoft.com/'. "
"CKV_AZURE_OPENAI_DEPLOYMENT_NAME - this is your Deployment Name from https://oai.portal.com portal you wish to use. "
"With CKV_AZURE_OPENAI_API_VERSION environment variable you are also able to control API version - this defaults to '2023-05-15'. ",
)
Comment on lines +552 to +563
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't make sense to add a flag for it, because you need multiple other settings, which are set via env vars to make it work

1 change: 1 addition & 0 deletions docs/2.Basics/CLI Command Reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,5 @@ nav_order: 2
| `--scan-secrets-history` | Enable secret scan history of commits |
| `--block-list-secret-scan CKV_SECRETS_SCAN_BLOCK_LIST` | List of files to filter out in the secret scanner |
| `--support` | Enable debug logs and upload the logs to the server. Requires a Bridgecrew or Prisma Cloud API key. |
| `--openai-api-type` | By switching this flag to 'azure', you are able to send violated policies to Azure OpenAI service. You have to provide Key, either with --openai-api-key or CKV_OPENAI_API_KEY. Before you switch to 'azure' you also have to define CKV_AZURE_OPENAI_API_ENDPOINT and CKV_AZURE_OPENAI_DEPLOYMENT_NAME environment variables. CKV_AZURE_OPENAI_API_ENDPOINT - is the Azure OpenAI service endpoint, eg: 'https://eastus.api.cognitive.microsoft.com/'. CKV_AZURE_OPENAI_DEPLOYMENT_NAME - this is your Deployment Name from https://oai.portal.com portal you wish to use. With CKV_AZURE_OPENAI_API_VERSION environment variable you are also able to control API version - this defaults to '2023-05-15'. |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove

| `--openai-api-key` | Add an OpenAI API key to enhance finding guidelines by sending violated policies and resource code to OpenAI to request remediation guidance. This will use your OpenAI credits. Set your number of findings that will receive enhanced guidelines using CKV_OPENAI_MAX_FINDINGS |
20 changes: 20 additions & 0 deletions docs/4.Integrations/OpenAI.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ Check: CKV_AWS_16: "Ensure all data stored in the RDS is securely encrypted at r
Passed checks: 1, Failed checks: 1, Skipped checks: 0
```

## OpenAI type

With `--openai-api-type` flag, you are able to choose OpenAI source. Possible values here are `default` and `azure`. Default valuie is obvoiusly `default`, and this flag will redirect your requests to public-generally available OpenAI service. By switching this flag to `--openai-api-type azure` you can query **Azure OpenAI** resource using Azure OpenAI Key. Please see below [Azure OpenAI] section for more configuration details.

Comment on lines +97 to +100
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this to the Settings block as an env var

## Settings

Following environment variables can be used to fine tune the amount of AI generated guidelines.
Expand All @@ -104,3 +108,19 @@ Following environment variables can be used to fine tune the amount of AI genera
| CKV_OPENAI_MAX_FINDINGS | 5 | Amount of findings per framework to add enhanced guidelines. |
| CKV_OPENAI_MAX_TOKENS | 512 | Maximum number of tokens to generate in the chat completion. |
| CKV_OPENAI_MODEL | gpt-3.5-turbo | ID of the chat completion model to use. |


## Azure OpenAI

Azure OpenAI re-use following environment variables from 'default' OpenAI configuration:
- CKV_OPENAI_API_KEY
- CKV_OPENAI_MAX_FINDINGS
- CKV_OPENAI_MAX_TOKENS

To use Azure OpenAI you have to define following environment variables:

| Environment variable | Default | Info |
|------------------------------------|---------------|--------------------------------------------------------------|
| CKV_AZURE_OPENAI_API_ENDPOINT | | Azure OpenAI Endpoint format. Eg.: 'https://eastus.api.cognitive.microsoft.com/'|
| CKV_AZURE_OPENAI_API_VERSION | 2023-05-15 | Azure OpenAI API version to use. |
| CKV_AZURE_OPENAI_DEPLOYMENT_NAME | | Deployment Name of the chat completion model to use. Note that, deployment must be already deployed in https://oai.azure.com portal. |
23 changes: 23 additions & 0 deletions tests/common/output/test_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from checkov.common.output.ai import OpenAi

import os
from unittest import mock

def test_parse_completion_response():
# given
Expand Down Expand Up @@ -89,3 +91,24 @@ def test_parse_completion_response():
"",
"With this change, the RDS instance will be encrypted at rest, and will comply with the checkov policy.",
]

@mock.patch.dict(os.environ, {})
def test_azure_openai_missing_configuration():
api_type_selected = 'azure'
OpenAi._instance = None
openai = OpenAi(api_key='not_a_real_key', api_type=api_type_selected)
assert openai._should_run == False

@mock.patch.dict(os.environ, {'CKV_AZURE_OPENAI_API_ENDPOINT': "https://eastus.api.cognitive.microsoft.com/", "CKV_AZURE_OPENAI_API_VERSION": "2023-05-15", "CKV_AZURE_OPENAI_DEPLOYMENT_NAME": "gpt-4"})
def test_azure_openai_type_is_set_correctly():
api_type_selected = 'azure'
OpenAi._instance = None
openai = OpenAi(api_key='not_a_real_key', api_type=api_type_selected)
assert openai._api_type == api_type_selected

@mock.patch.dict(os.environ, {'CKV_AZURE_OPENAI_API_ENDPOINT': "https://eastus.api.cognitive.microsoft.com/", "CKV_AZURE_OPENAI_API_VERSION": "2023-05-15", "CKV_AZURE_OPENAI_DEPLOYMENT_NAME": "gpt-4"})
def test_azure_openai_correct_configuration():
api_type_selected = 'azure'
OpenAi._instance = None
openai = OpenAi(api_key='not_a_real_key', api_type=api_type_selected)
assert openai._should_run == True
1 change: 1 addition & 0 deletions tests/common/runner_registry/test_runner_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,6 +725,7 @@ def test_output_file_path_with_output_mapping(tmp_path: Path, capsys: CaptureFix
output_bc_ids=False,
summary_position="top",
openai_api_key=None,
openai_api_type="default",
)

# when
Expand Down
Loading