Skip to content

Commit

Permalink
* add contrib feature in API
Browse files Browse the repository at this point in the history
* add predict button on dashboard
* add contrib button on dashboard
  • Loading branch information
AuHoh committed Jul 11, 2023
1 parent b96edf9 commit 23df9b4
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 34 deletions.
24 changes: 22 additions & 2 deletions api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pickle
import os
from smart_open import open
import shap

import pandas as pd
from fastapi import FastAPI, Body, HTTPException
Expand All @@ -13,10 +14,12 @@
with open(model_path, 'rb') as file:
model = pickle.load(file)

explainer = shap.TreeExplainer(model)

app = FastAPI()


@app.post("/", status_code=200)
@app.post("/predict", status_code=200)
def predict_credit(request: dict = Body(examples=[test_dict])):
df = pd.DataFrame([request])

Expand All @@ -34,14 +37,31 @@ def predict_credit(request: dict = Body(examples=[test_dict])):

results = {'credit_score_risk':
{'predict_proba': output_proba,
'predict_business_risk': output},
'predict_business_risk': output,
'predict_th_proba': th_proba},
'prediction': prediction
}

json_results = json.dumps(results)
return json_results


@app.post("/contrib", status_code=200)
def predict_contrib(request: dict = Body(examples=[test_dict])):
df = pd.DataFrame([request])

try:
shap_values = explainer.shap_values(df)
except Exception as e:
print(e)
raise HTTPException(status_code=400, detail=str(e))

results = {'shap_values': list(shap_values[1][0])}

json_results = json.dumps(results)
return json_results


if __name__ == "__main__":
import uvicorn

Expand Down
3 changes: 2 additions & 1 deletion api/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@ fastapi==0.99.1
pandas==1.5.3
httpx==0.24.1
smart-open==6.3.0
lightgbm==3.3.5
lightgbm==3.3.5
shap==0.40.0
3 changes: 1 addition & 2 deletions api/sample_call.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
test_dict = {"SK_ID_CURR": -1.388272000642093,
"CNT_CHILDREN": -0.5772200198290075,
test_dict = {"CNT_CHILDREN": -0.5772200198290075,
"AMT_INCOME_TOTAL": -0.5301826452722314,
"AMT_CREDIT": -1.088555397503703,
"AMT_ANNUITY": -1.0804966425436175,
Expand Down
17 changes: 14 additions & 3 deletions api/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import unittest

from fastapi.testclient import TestClient
from main import predict_credit, app
from main import predict_credit, app, predict_contrib
from sample_call import test_dict

client = TestClient(app)


class TestAPI(unittest.TestCase):
def test_if_api_call_request_is_correct(self):
def test_if_api_predict_call_request_is_correct(self):
# given
request = test_dict.copy()

Expand All @@ -26,10 +26,21 @@ def test_if_bad_api_call_request_returns_correct_status_code(self):

# when
api_response = client.post(
"/",
"/predict",
json=request,
)

# then
api_response_status_code_expected = 400
self.assertEqual(api_response_status_code_expected, api_response.status_code)

def test_if_api_contrib_call_request_is_correct(self):
# given
request = test_dict.copy()

# when
api_result = json.loads(predict_contrib(request))

# then
expected_size = 663
self.assertEqual(expected_size, len(api_result['shap_values']))
161 changes: 135 additions & 26 deletions dashboard/main.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,67 @@
import json
import os
import pickle

import pandas as pd
import numpy as np
import requests
import streamlit as st
import matplotlib.pyplot as plt
import seaborn as sns

api_uri = os.getenv('API_URI', 'http://localhost:8000')

st.set_page_config(page_title="Prêt à dépenser")

st.title("Tableau d'évaluation des risques pour l'accord des crédits")


def request_prediction(api_uri, data):
headers = {"Content-Type": "application/json"}

response = requests.request(
method='POST', headers=headers, url=api_uri, json=data)

if response.status_code != 200:
raise Exception(
"Request failed with status {}, {}".format(response.status_code, response.text))

return response.json()


@st.cache_data
def get_std_scaler():
model_path = os.getenv('SCALER_PATH', 'scaler.pkl')
with open(model_path, 'rb') as file:
std_scaler = pickle.load(file)
return std_scaler


@st.cache_data
def get_data():
def get_data_test_predict():
path = '/Users/audreyhohmann/Documents/Formation/OCR/P7/X_test_full.parquet'
return pd.read_parquet(path)


df_predict = get_data_test_predict()


@st.cache_data
def get_data_test():
path = '/Users/audreyhohmann/Documents/Formation/OCR/P7/df_forstream.parquet'
return pd.read_parquet(path)
df = get_data()

#st.dataframe(df)

std_scaler = get_std_scaler()

df = get_data_test()

# st.dataframe(df)
ID_pret = st.selectbox('Choisir ID du prêt', df['SK_ID_CURR'], help='Filtrer sur les identifiants des crédits')

# Filtrer le dataframe pour l'ID sélectionné
filtered_df = df.loc[df['SK_ID_CURR'] == ID_pret]

predict_btn = st.button('Prédire')
st.write("Prédiction du modèle d'évaluation : ")
#st.metric(label="Score de prédiction", value=0.53, delta=value-th_proba)
filtered_df_predict = df_predict.loc[df_predict['SK_ID_CURR'] == ID_pret].reset_index(drop=True)

st.subheader('Modifications des valeurs des features pour calculer une nouvelle prédiction : ')
# Vérifier si des données correspondent à l'ID sélectionné
Expand Down Expand Up @@ -58,21 +98,88 @@ def get_data():
st.write(f"Montant des annuités : {credit}")
updated_annuity = st.slider("Variations des montants des annuités ('AMT_ANNUITY')", 0.0, 230000.0, annuity, 5000.0)

term = float("{:.2f}".format(filtered_df['CREDIT_TERM'].values[0]*100))
term = float("{:.2f}".format(filtered_df['CREDIT_TERM'].values[0] * 100))
st.write(f"Taux de paiement : {term}")
updated_term = st.slider("Variations du taux de paiement ", 0.0, 40.0, term, 1.0)

predict_btn = st.button('Prédire')
if predict_btn:
unscale_filtered_df_predict = pd.DataFrame(std_scaler.inverse_transform(
filtered_df_predict.drop(['SK_ID_CURR'], axis=1)),
columns=filtered_df_predict.drop(['SK_ID_CURR'], axis=1).columns)

unscale_filtered_df_predict['AMT_INCOME_TOTAL'] = revenu
unscale_filtered_df_predict['AMT_GOODS_PRICE'] = bien
unscale_filtered_df_predict['AMT_CREDIT'] = credit
unscale_filtered_df_predict['AMT_ANNUITY'] = annuity
unscale_filtered_df_predict['CREDIT_TERM'] = term

unscale_filtered_df_predict['CREDIT_INCOME_PERCENT'] = np.divide(updated_credit, updated_income)
unscale_filtered_df_predict['ANNUITY_INCOME_PERCENT'] = np.divide(updated_annuity, updated_income)
unscale_filtered_df_predict['CREDIT_TERM'] = np.divide(updated_annuity, updated_credit)
unscale_filtered_df_predict = unscale_filtered_df_predict.replace(np.inf, 0)
filtered_df_predict = pd.DataFrame(std_scaler.transform(unscale_filtered_df_predict),
columns=unscale_filtered_df_predict.columns)

response_result = json.loads(request_prediction(f'{api_uri}/predict',
filtered_df_predict.to_dict(orient='index')[0]))

st.write("Prédiction du modèle d'évaluation : ")
st.metric(label="Score de prédiction",
value=response_result['credit_score_risk']['predict_proba'],
delta=response_result['credit_score_risk']['predict_proba'] - response_result['credit_score_risk'][
'predict_th_proba'])

contrib_btn = st.button('Contribution des features')
if contrib_btn:
response_result = json.loads(request_prediction(f'{api_uri}/contrib',
filtered_df_predict.drop(['SK_ID_CURR'], axis=1).to_dict(orient='index')[0]))
shap_values = response_result['shap_values']
df_shap_values = pd.DataFrame([shap_values],
columns=filtered_df_predict.drop(['SK_ID_CURR'], axis=1).columns).T.reset_index(drop=False)
df_shap_values.columns = ['feature', 'shap_value']
df_shap_values = df_shap_values.sort_values('shap_value', ascending=False)

fig, ax = plt.subplots(figsize=(10, 8))
plt.bar(df_shap_values.head()['feature'], df_shap_values.head()['shap_value'])
st.pyplot(fig)


else:
st.write("Aucune donnée correspondante pour l'ID prêt sélectionné.")

predict_btn = st.button('Nouvelle prédiction')

def dowload_excel():
# Chemin du fichier Excel existant
col_des = '/Users/audreyhohmann/Documents/Formation/OCR/P7/colonnes_description.xlsx'

# Lecture du fichier Excel en tant que binaire
with open(col_des, 'rb') as fichier:
contenu = fichier.read()

# Téléchargement du fichier
st.download_button(
label='Télécharger la description des features du modèle',
data=contenu,
file_name='colonnes_description.xlsx',
mime='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)


dowload_excel()


@st.cache_data
def get_data():
def get_data_train():
path_train = '/Users/audreyhohmann/Documents/Formation/OCR/P7/df_train_forstream.parquet'
return pd.read_parquet(path_train)
df_train = get_data()


df_train = get_data_train()

sns.set_theme(style="ticks", font='sans-serif', palette="Set2")


def plot_kde(df, feature_y):
# Créer la figure et les sous-graphiques
fig, ax = plt.subplots(figsize=(10, 8))
Expand All @@ -99,7 +206,6 @@ def plot_kde(df, feature_y):
# KDE plot des prêts non remboursés à temps (target == 1)
sns.kdeplot(df.loc[df['TARGET'] == 1, feature_y], color="red", label='crédit refusé', ax=ax)


# Ajout de la position du client
if selected_feature_y == 'AMT_INCOME_TOTAL':
plt.axvline(x=revenu, color='blue', linestyle='--', label='Position du client')
Expand All @@ -119,7 +225,6 @@ def plot_kde(df, feature_y):
ax.set_ylabel('Densité')
ax.set_title('Répartition des ' + feature_y, fontsize=22)


# Ajout de la légende
ax.legend()

Expand All @@ -130,19 +235,21 @@ def plot_kde(df, feature_y):
# Afficher le graphique dans Streamlit
st.pyplot(fig)


def plot_relplot(df, x_feature, y_feature, hue_op=None):
# Créer la figure et les sous-graphiques
fig, ax = plt.subplots(figsize=(10, 8))


if hue_op is None:
sns.scatterplot(data=df, x=x_feature, y=y_feature, color='olive', ax=ax)
elif hue_op == job:
sns.scatterplot(data=df.loc[df['OCCUPATION_TYPE'] == job], x=x_feature, y=y_feature, hue='OCCUPATION_TYPE', ax=ax)
sns.scatterplot(data=df.loc[df['OCCUPATION_TYPE'] == job], x=x_feature, y=y_feature, hue='OCCUPATION_TYPE',
ax=ax)
elif hue_op == genre:
sns.scatterplot(data=df.loc[df['CODE_GENDER'] == genre], x=x_feature, y=y_feature, hue='CODE_GENDER', ax=ax)
elif hue_op == family:
sns.scatterplot(data=df.loc[df['NAME_FAMILY_STATUS'] == family], x=x_feature, y=y_feature, hue='NAME_FAMILY_STATUS', ax=ax)
sns.scatterplot(data=df.loc[df['NAME_FAMILY_STATUS'] == family], x=x_feature, y=y_feature,
hue='NAME_FAMILY_STATUS', ax=ax)
else:
sns.scatterplot(data=df, x=x_feature, y=y_feature, hue=hue_op, ax=ax)

Expand All @@ -154,22 +261,22 @@ def plot_relplot(df, x_feature, y_feature, hue_op=None):
ax.legend()
# plt.tight_layout()

#ajout de la position du client
# ajout de la position du client
if x_feature == 'AMT_INCOME_TOTAL' and y_feature == 'AMT_CREDIT':
plt.scatter(revenu, credit, color='blue', marker='x', label='Position du client')
elif x_feature == 'AMT_INCOME_TOTAL' and y_feature == 'AMT_GOODS_PRICE':
plt.scatter(revenu, bien, color='blue', marker='x', label='Position du client')
elif x_feature == 'AMT_INCOME_TOTAL' and y_feature == 'AMT_ANNUITY':
plt.scatter(revenu, annuity, color='blue', marker='x', label='Position du client')


# Afficher le graphique dans Streamlit
st.pyplot(fig)


st.subheader("Distribution de la feature sélectionnée selon les classes du modèle d'entraînement")
# Widget selectbox pour choisir la feature y
selected_feature_y = st.selectbox('Choisir la feature', df_train[['AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_GOODS_PRICE', 'AMT_ANNUITY', 'DAYS_BIRTH', 'DAYS_EMPLOYED']].columns)
selected_feature_y = st.selectbox('Choisir la feature', df_train[
['AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_GOODS_PRICE', 'AMT_ANNUITY', 'DAYS_BIRTH', 'DAYS_EMPLOYED']].columns)

# Vérifier si des données correspondent à la feature sélectionnée
if selected_feature_y in df_train.columns:
Expand All @@ -179,17 +286,19 @@ def plot_relplot(df, x_feature, y_feature, hue_op=None):
else:
st.write("La feature sélectionnée n'est pas présente dans le dataframe.")

print(job)

st.subheader("Analyse bivariée entre les features quantitatives")
selected_feature_x_relplot = st.selectbox('Choisir la feature 1', df_train[['AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_GOODS_PRICE', 'AMT_ANNUITY']].columns, key="x_feature")
selected_feature_y_relplot = st.selectbox('Choisir la feature 2', df_train[['AMT_CREDIT', 'AMT_GOODS_PRICE', 'AMT_ANNUITY']].columns, key="y_feature")
selected_feature_hue = st.selectbox('Choisir la feature catégorielle (coloration des points)', ['None'] + df_train[['CODE_GENDER', 'OCCUPATION_TYPE', 'NAME_FAMILY_STATUS']].columns.tolist() + [genre] + [job] + [family], index=0, key="hue_feature")
selected_feature_x_relplot = st.selectbox('Choisir la feature 1', df_train[
['AMT_INCOME_TOTAL', 'AMT_CREDIT', 'AMT_GOODS_PRICE', 'AMT_ANNUITY']].columns, key="x_feature")
selected_feature_y_relplot = st.selectbox('Choisir la feature 2',
df_train[['AMT_CREDIT', 'AMT_GOODS_PRICE', 'AMT_ANNUITY']].columns,
key="y_feature")
selected_feature_hue = st.selectbox('Choisir la feature catégorielle (coloration des points)', ['None'] + df_train[
['CODE_GENDER', 'OCCUPATION_TYPE', 'NAME_FAMILY_STATUS']].columns.tolist() + [genre] + [job] + [family], index=0,
key="hue_feature")

if selected_feature_hue == 'None':
hue_op = None
else:
hue_op = selected_feature_hue


plot_relplot(df_train, selected_feature_x_relplot, selected_feature_y_relplot, hue_op)
plot_relplot(df_train, selected_feature_x_relplot, selected_feature_y_relplot, hue_op)
Binary file added dashboard/scaler.pkl
Binary file not shown.

0 comments on commit 23df9b4

Please sign in to comment.