Merge pull request #37 from UiPath/alpha

Additional models to production
UiPath · Oct 27, 2020 · 8701a96 · 8701a96
2 parents 549a073 + 3c83a46
commit 8701a96
Show file tree

Hide file tree

Showing 9 changed files with 171 additions and 35 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+*.iml
diff --git a/gpu/install_gpu_drivers.sh b/gpu/install_gpu_drivers.sh
@@ -1,39 +1,48 @@
 #!/bin/bash
 
-function install_gpu() {
+install_gpu() {
+
+    # Set cuda nvidia repository
     curl -O https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.0.130-1_amd64.deb
     sudo dpkg -i cuda-repo-ubuntu1804_10.0.130-1_amd64.deb
     sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
 
-        sudo apt update
-        sudo apt-get -y install cuda
+    # install cuda
+    sudo apt update
+    sudo apt-get -y install cuda
 
-        echo "################################# Validate NVIDIA-SMI #####################################"
-        nvidia-smi
-        nvidia_result=$(nvidia-smi | grep "Tesla K80")
-        if [[ "$nvidia_result" == *"Tesla K80"* ]]; then
-            agree1="Y"
-        fi
-    if [ "$agree1" == "Y" ]     ; then
+    echo "################################# Validate NVIDIA-SMI #####################################"
+    nvidia-smi
+    nvidia_version=$(nvidia-smi | grep NVIDIA-SMI | grep CUDA)
+
+    if [[ "${nvidia_version}" == *"CUDA Version"* && "${nvidia_version}" == *"Driver Version"* ]]; then
+
+        # install nvidia-docker based on distribution
         distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
         curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add -
         curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | sudo tee /etc/apt/sources.list.d/nvidia-docker.list
         sudo apt-get update && sudo apt-get install -y nvidia-docker2
-        # edit json
-        edit_daemon_json
+
+        # update docker daemon json with nvidia runtime
+        set_daemon_json
         sudo systemctl restart docker
-        sleep 2
-        validate_kubectl_up
+
+        sleep 4
+
+        # Add kubernetes daemon to add connectivity to nvidia 
+        validate_kubectl_command
         kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/1.0.0-beta6/nvidia-device-plugin.yml
 
+        # Validate that gpu driver installation & integration is successful
         validate_gpu_updated
+        echo "################################# Nvidia integration succeeded #####################################"
     else
         echo "################ GPU driver installation failed ###################"
         exit
     fi
 }
 
-function edit_daemon_json(){
+set_daemon_json(){
     echo "################## Updating docker configuration ######################"
     sudo bash -c '
 echo \
@@ -49,35 +58,35 @@ echo \
 
 }
 
-function validate_kubectl_up() {
+validate_kubectl_command() {
     count=0
     while [ $count -lt 50 ]; do
         result=$(kubectl get nodes| grep master)
         if [[ "$result" == *"master"* ]]; then
-            echo "kubectl up after " $((count * 5)) "seconds"
+            echo "kubectl command ran successfully in " $((count * 5)) "seconds"
             break
         else
-            echo "kubectl not up, retry : " $count
+            echo "Not able to run kubectl command, retry : " $count
             count=$(( $count + 1 ))
             sleep 5
         fi
     done
 
     if [ $count == 50 ]; then
-        echo "Kubectl Failed to come up"
+        echo "Failed to run kubectl command. Please check if kubernetes context is properly set and available to logged in user."
         swapoff -a
         exit
     fi
 }
 
-function validate_gpu_updated() {
+validate_gpu_updated() {
     count=0
     while [ $count -lt 50 ]; do
         result=$(kubectl describe nodes| grep nvidia.com/gpu)
         if [[ "$result" == *"nvidia.com/gpu"* ]]; then
             echo $result
             echo "Node gpu info updated after " $((count * 5)) "seconds"
-            echo "##################### Successfully installed GPU #########################"
+            echo "##################### Successfully integrated GPU with kubernetes #########################"
             break
         else
             echo "kubectl gpu info not updated, retry : " $count
@@ -87,23 +96,23 @@ function validate_gpu_updated() {
     done
 
     if [ $count == 50 ]; then
-        echo "################## Failed to install gpu ####################"
+        echo "################## Failed to integrate with gpu ####################"
         swapoff -a
         exit
     fi
 }
 
+main() {
+
+    # check if GPU is attached to disk
+    check_nvidia=$(sudo lshw -C display|grep NVIDIA)
 
-sudo lshw -C display
-check_nvidia=$(sudo lshw -C display|grep NVIDIA)
-if [[ "$check_nvidia" == *"NVIDIA"* ]]; then
-    agree="Y"
-fi
+    if [[ "$check_nvidia" == *"NVIDIA"* ]]; then
+        install_gpu
+    else
+        echo "####### GPU not installed in the setup ###########"
+        exit
+    fi
+}
 
-if [ "$agree" == "Y" ]
-then
-    install_gpu
-else
-    echo "####### GPU not installed in the setup ###########"
-    exit
-fi
+main
diff --git a/metadata/ComputerVision__1__metadata.json b/metadata/ComputerVision__1__metadata.json
@@ -0,0 +1,21 @@
+{
+  "changeLog": "Release v2020.10",
+  "cpu": 0,
+  "description": "Backend server for the Computer Vision solution that detects User Interface Elements from a provided Application screenshot",
+  "displayName": "ComputerVision",
+  "gpu": 0,
+  "inputDescription": "Please use the Computer Vision Activities included in the UIAutomation package. For more information please visit the official documentation.",
+  "inputType": "JSON",
+  "memory": 0,
+  "mlPackageLanguage": "PYTHON36_CV",
+  "name": "ComputerVision",
+  "outputDescription": "Please use the Computer Vision Activities included in the UIAutomation package. For more information please visit the official documentation.",
+  "processorType": "GPU",
+  "projectId": "[project-id]",
+  "retrainable": false,
+  "stagingUri": "[staging-uri]",
+  "projectName": "UiPath Document Understanding",
+  "projectDescription": "UiPath models to classify and extract information from images and pdfs.",
+  "tenantName": "UiPath",
+  "imagePath": "registry.replicated.com/aif-core/computervision:1"
+}
diff --git a/metadata/EnglishTextClassification__1__metadata.json b/metadata/EnglishTextClassification__1__metadata.json
@@ -0,0 +1,21 @@
+{
+  "changeLog": "",
+  "cpu": 0,
+  "description": "This is the preview version of a generic, retrainable model for English Classification. This ML Package must be retrained, if deployed without training first, deployment will fail with an error stating that the model is not trained. This model is a deep learning architecture for language classification. It is based on RoBERTa, a self-supervised method for pretraining natural language processing systems. A GPU can be used both at serving time and training time. A GPU delivers ~5-10x improvement in speed. The original paper can be found here https://arxiv.org/abs/1907.11692 by Yinhan Liu, Myle Ott et al. The model was open-sourced by Facebook AI Research.",
+  "displayName": "EnglishTextClassification",
+  "gpu": 0,
+  "inputDescription": "Text to be classified as String: 'I loved this movie.'",
+  "inputType": "JSON",
+  "memory": 0,
+  "mlPackageLanguage": "PYTHON36",
+  "name": "EnglishTextClassification",
+  "outputDescription": "JSON with pedicted class name, associated confidence on that class prediction (between 0-1).  For example: {\"prediction\": \"Positive\", \"confidence\": 0.9422031841278076,}",
+  "processorType": "GPU",
+  "retrainable": true,
+  "stagingUri": "[staging-uri]",
+  "projectId": "[project-id]", 
+  "projectName": "Language Analysis",
+  "projectDescription": "Models for analyzing text including language detection, sentiment analysis, and named-entity recognition.",
+  "tenantName": "Open-Source Packages",
+  "imagePath": "registry.replicated.com/aif-core/englishtextclassification:1"
+}
diff --git a/metadata/HandwritingRecognition__1__metadata.json b/metadata/HandwritingRecognition__1__metadata.json
@@ -1,5 +1,5 @@
 {
-  "changeLog": "Release v2020.8",
+  "changeLog": "Release v2020.10",
   "cpu": 0,
   "description": "This Package provides the Handwriting recognition capability required by the Intelligent Form Extractor Package. Deploying this ML Package is required prior to deploying the Intelligent Form Extractor Package. For detailed instructions about the steps required to correctly configure and deploy the Intelligent Form Extractor, see the Out-of-the-box Packages documentation for AI Fabric here: https://docs.uipath.com/ai-fabric/docs/uipath-document-understanding",
   "displayName": "HandwritingRecognition",

diff --git a/metadata/IntelligentKeywordClassifier__1__metadata.json b/metadata/IntelligentKeywordClassifier__1__metadata.json
@@ -0,0 +1,21 @@
+{
+  "changeLog": "Release v2020.9",
+  "cpu": 0,
+  "description": "This Package provides the Endpoint required by the Intelligent Keyword Classifier activity. Please see more details in the Intelligent Keyword Classifier activity documentation here: https://docs.uipath.com/activities/docs/intelligent-keyword-classifier",
+  "displayName": "IntelligentKeywordClassifier",
+  "gpu": 0,
+  "inputDescription": "ML Skills deployed using this package are queried for the Intelligent Keyword Classifier Activity. To learn how to use the Intelligent Keyword Classifier, visit its documentation: https://docs.uipath.com/activities/docs/intelligent-keyword-classifier .",
+  "inputType": "JSON",
+  "memory": 0,
+  "mlPackageLanguage": "PYTHON37_DU",
+  "name": "IntelligentKeywordClassifier",
+  "outputDescription": "Please refer to the documentation of the Intelligent Keyword Classifier Activity.",
+  "processorType": "CPU",
+  "projectId": "[project-id]",
+  "retrainable": false,
+  "stagingUri": "[staging-uri]",
+  "projectName": "UiPath Document Understanding",
+  "projectDescription": "UiPath models to classify and extract information from images and pdfs.",
+  "tenantName": "UiPath",
+  "imagePath": "registry.replicated.com/aif-core/intelligentkeywordclassifier:1"
+}
diff --git a/metadata/TPOTAutoMLClassification__1__metadata.json b/metadata/TPOTAutoMLClassification__1__metadata.json
@@ -0,0 +1,21 @@
+{
+  "changeLog": "",
+  "cpu": 0,
+  "description":"Please train this ML Package before deploying it as it will not return anything otherwise.   \n\nTPOT is a Python Automated Machine Learning tool that optimizes machine learning pipelines using genetic programming. TPOT will automate the most tedious part of machine learning by intelligently exploring thousands of possible pipelines to find the best one for your data. Once TPOT is finished searching (or you get tired of waiting), it provides you with the Python code for the best pipeline it found so you can tinker with the pipeline from there. TPOT is built on top of scikit-learn, so all the code it generates should look familiar to scikit-learn users.   \n\nThe model is based on a publication entitled \"Scaling tree-based automated machine learning to biomedical big data with a feature set selector.\" from Trang T. Le, Weixuan Fu and Jason H. Moore (2020) and \"Evaluation of a Tree-based Pipeline Optimization Tool for Automating Data Science.\" from Randal S. Olson, Nathan Bartley, Ryan J. Urbanowicz, and Jason H. Moore. ",
+  "displayName": "TPOTAutoMLClassification",
+  "gpu": 0,
+  "inputDescription":"Features used by the model to make predictions. For example: { \n\n“Feature1”:  12, \n\n“Feature2”:  222, \n\n. \n\n. \n\n“FeatureN”:  110 \n\n} ",
+  "inputType": "JSON",
+  "memory": 0,
+  "mlPackageLanguage": "PYTHON36",
+  "name": "TPOTAutoMLClassification",
+  "outputDescription":"JSON with predicted class, associated confidence on that class prediction (between 0-1) and label name. Label names are returned only if the label encoding was performed by the pipeline, within AI Fabric. Some scikit-learn models do not support confidence scores. If the output of the optimization pipeline is a scikit-learn model which does not support confidence scores the output will only contain the predicted class. Ex: { \n\n  \"predictions\": 0,  \n\n  \"confidences\": 0.6, \n\n  \"labels\": “yes”  \n\n} \n\nOr if label encoding was done outside of the model: { \n\n  \"predictions\": 0,  \n\n  \"confidences\": 0.6, \n\n}   ",  "processorType": "CPU",
+  "processorType": "CPU",
+  "projectId": "[project-id]",  
+  "retrainable": true,
+  "stagingUri": "[staging-uri]",
+  "projectName": "Tabular Data",
+  "projectDescription": "Models for analyzing tabular data including classification and regression ML Packages",
+  "tenantName": "Open-Source Packages",
+  "imagePath": "registry.replicated.com/aif-core/tpotautomlclassification:1"
+}
diff --git a/metadata/TextSummarization__1__metadata.json b/metadata/TextSummarization__1__metadata.json
@@ -0,0 +1,21 @@
+{
+  "changeLog": "",
+  "cpu": 0,
+  "description": "This is a abstractive text summarization model open sourced by Facebook AI Research. It is a sequence-to-sequence model based on the paper `BART: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension` by Lewis, et al.",
+  "displayName": "TextSummarization",
+  "gpu": 0,
+  "inputDescription": "Text to be summarized as a String. Please note this model can be slow for long inputs.",
+  "inputType": "JSON",
+  "memory": 0,
+  "mlPackageLanguage": "PYTHON36",
+  "name": "TextSummarization",
+  "outputDescription": "JSON with summarized text. The resulting output will have about 20-30% the length of the input",
+  "processorType": "CPU",
+  "projectId": "[project-id]",  
+  "retrainable": false,
+  "stagingUri": "[staging-uri]",
+  "projectName": "Language Comprehension",
+  "projectDescription": "Models performing cognitively challenging tasks such as text summarization and question answering",
+  "tenantName": "Open-Source Packages",
+  "imagePath": "registry.replicated.com/aif-core/textsummarization:1"
+}
diff --git a/metadata/UiPathDocumentOCR__1__metadata.json b/metadata/UiPathDocumentOCR__1__metadata.json
@@ -0,0 +1,21 @@
+{
+  "changeLog": "Release v2020.10",
+  "cpu": 0,
+  "description": "Machine Learning model for extracting text from Documents. Please see more details including supported languages and link to Activities guide in the About Licensing -> Document Understanding API Key section of the UiPath Automation Cloud Guide here: https://docs.uipath.com/automation-cloud/docs/about-licensing#section-document-understanding-api-key.",
+  "displayName": "UiPathDocumentOCR",
+  "gpu": 0,
+  "inputDescription": "ML Skills deployed using this ML Package can be integrated into RPA workflows using the UiPath Document OCR activity from OCR.Activities pack in the Official feed. File formats accepted include pdf, tiff, jpg or png files. In non-airgapped deployments, the activity requires the Document Understanding API Key input which you need to obtain from your UiPath Automation Cloud account, in the Licenses -> Other Services view.",
+  "inputType": "JSON",
+  "memory": 0,
+  "mlPackageLanguage": "PYTHON37_DU",
+  "name": "UiPathDocumentOCR",
+  "outputDescription": "Please refer to the documentation of the Activity used to query the ML Skill.",
+  "processorType": "GPU",
+  "projectId": "[project-id]",  
+  "retrainable": false,
+  "stagingUri": "[staging-uri]",
+  "projectName": "UiPath Document Understanding",
+  "projectDescription": "UiPath models to classify and extract information from images and pdfs.",
+  "tenantName": "UiPath",
+  "imagePath": "registry.replicated.com/aif-core/uipathdocumentocr:1"
+}