Training Pipeline Jobs • googleCloudVertexAIR

Setup

library(googleAuthR)
library(googleCloudVertexAIR)

options(googleAuthR.scopes.selected = "https://www.googleapis.com/auth/cloud-platform")

gar_auth_service(json_file = Sys.getenv("GAR_SERVICE_JSON"))

Set global arguements

projectId <- Sys.getenv("GCVA_DEFAULT_PROJECT_ID")
gcva_region_set("us-central1")

## 2024-07-08 12:35:47.531773> Region set to 'us-central1'

gcva_project_set(projectId)

## 2024-07-08 12:35:47.532444> ProjectId set to 'gc-vertex-ai-r'

Get training pipeline

training_pipeline <- gcva_trainingPipeline(
  trainingPipelineName = Sys.getenv("GCVA_TRAINING_PIPELINE")
    )
training_pipeline

## ==Google Cloud Vertex AI TrainingPipeline Job==
## console:              https://console.cloud.google.com/vertex-ai/locations/us-central1/training/2601571779788931072?project=442003009360 
## state:                PIPELINE_STATE_SUCCEEDED

Get model from training pipeline

The gcva_model() function will parse the modelName from the trainingPipelineJob object.

gcva_model(model = training_pipeline)

## ==Google Cloud Vertex AI Model==
## name:                 projects/442003009360/locations/us-central1/models/2734102788232445952 
## displayName:          model-california-housing-20221113163728 
## createTime:           2022-11-13 21:38:14 
## versionId:            1 
## versionAlias:         default

Create custom container training job

First, 1) create a TrainingPipeline that runs a CustomJob and then 2) import the resulting artifacts as a Model

job <- gcva_custom_container_training_job(
  stagingBucket = "gs://my-bucket-name", 
  displayName = "vertex-r",
  containerUri = "us-central1-docker.pkg.dev/gc-vertex-ai-r/my-docker-repo/vertex-r:latest",
  command = c("Rscript", "train.R"),
  modelServingContainerCommand = c("Rscript", "serve.R"),
  modelServingContainerImageUri = "us-central1-docker.pkg.dev/gc-vertex-ai-r/my-docker-repo/vertex-r:latest",
  machineType = "n1-standard-4"
)
job

## ==Google Cloud Vertex AI Custom Container Training Job================
## {
##   "displayName": "vertex-r",
##   "inputDataConfig": {
##     "datasetId": "",
##     "gcsDestination": {
##       "outputUriPrefix": "gs://my-bucket-name"
##     }
##   },
##   "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/custom_task_1.0.0.yaml",
##   "trainingTaskInputs": {
##     "workerPoolSpecs": [
##       {
##         "machineSpec": {
##           "machineType": "n1-standard-4"
##         },
##         "containerSpec": {
##           "imageUri": "us-central1-docker.pkg.dev/gc-vertex-ai-r/my-docker-repo/vertex-r:latest",
##           "command": ["Rscript", "train.R"]
##         }
##       }
##     ],
##     "baseOutputDirectory": {
##       "outputUriPrefix": ""
##     }
##   },
##   "modelToUpload": {
##     "containerSpec": {
##       "imageUri": "us-central1-docker.pkg.dev/gc-vertex-ai-r/my-docker-repo/vertex-r:latest",
##       "command": ["Rscript", "serve.R"]
##     }
##   }
## }
## ====================================================================

Execute custom training job

# get dataset for next api call TESTING
dataset <- gcva_dataset(datasetName = gcva_list_datasets()[1,2])

model <- gcva_run_job(
  job = job,
  dataset = dataset,
  modelDisplayName = "vertex-r-model",
  machineType = "n1-standard-4"
)
model