Initial code commit

2025-05-12 19:22:55 -05:00 · 2025-05-12 19:22:55 -05:00 · b3382d880b
commit b3382d880b
parent 5b71cecab6
37 changed files with 1778 additions and 1 deletions
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@ -0,0 +1,358 @@
+name: Deploy the Helm Chart
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  # "Build" from the source code
+  build:  
+    runs-on: self-hosted
+    outputs:
+      chart-name: ${{ steps.update-helm-repo.outputs.CHART_NAME }}
+    steps:
+    # Get the source code from the repository
+    - name: Checkout repository
+      uses: actions/checkout@v4
+    
+    # "Build"/package the source code int the appropriate format (Helm chart etc...)
+    - name: Update Helm Repository (if needed)
+      id: update-helm-repo
+      run: |
+        # Parse the chart name from the Chart.yaml
+        CHART_NAME=$(yq '.name' Chart.yaml)
+        CHART_NAME=${CHART_NAME#\"}  # Remove leading quote
+        CHART_NAME=${CHART_NAME%\"}  # Remove trailing quote
+        echo "CHART_NAME=$CHART_NAME" >> $GITHUB_OUTPUT
+
+        echo "Chart Name: $CHART_NAME"
+
+        # Note, this depends on the [Harbor Helm Index](https://github.com/AlanBridgeman/harbor-helm-index) pretty heavily
+        # In particular, that tool allows us to treat what is an OCI registry as a Helm repository (which includes using `helm search repo`)
+        helm repo add BridgemanAccessible https://helm.bridgemanaccessible.ca
+            
+        # Check if the chart is already in the repository or not
+        REMOTE_CHART_WORK_OUTPUT=$(helm search repo BridgemanAccessible/$CHART_NAME 2>/dev/null || echo "")
+        if [ -n "$REMOTE_CHART_WORK_OUTPUT" ]; then
+          # The chart is already in the repository, so we need to check if the version is the same or not
+          echo "Chart already exists in the repository. Checking version..."
+
+          IFS=' ' read -r -a REMOTE_VERSIONS <<< $(helm search repo BridgemanAccessible/$CHART_NAME --output json | jq '.[].version | @sh')
+
+          echo "Remote Chart Versions: ${REMOTE_VERSIONS[@]}"
+        else
+          # The chart is not in the repository, so we'll need to add it
+          echo "Chart not found in the repository. Adding it..."
+
+          # Set a blank value so that it WON'T match the local version
+          IFS=' ' read -r -a REMOTE_VERSIONS <<< ""
+        fi
+
+        # Just to keep things clean/safe/etc... remove the repo
+        helm repo remove BridgemanAccessible
+            
+        # Get the local version from the Chart.yaml file
+        LOCAL_VERSION=$(yq '.version' Chart.yaml)
+        LOCAL_VERSION=${LOCAL_VERSION#\"}  # Remove leading quote
+        LOCAL_VERSION=${LOCAL_VERSION%\"}  # Remove trailing quote
+        echo "Local Chart Version: $LOCAL_VERSION"
+
+        has_match='false'
+        if [ ${#REMOTE_VERSIONS[@]} -gt 0 ]; then
+          # Loop through the remote tags and check if any of them match the local version
+          for REMOTE_VERSION in ${REMOTE_VERSIONS[@]}; do
+            REMOTE_VERSION=${REMOTE_VERSION#\'}  # Remove leading quote
+            REMOTE_VERSION=${REMOTE_VERSION%\'}  # Remove trailing quote
+
+            # Check if the remote tag is the same as the local tag
+            if [ "$REMOTE_VERSION" == "$LOCAL_VERSION" ]; then
+              echo "Remote version matches local version!"
+              has_match='true'
+              break
+            fi
+          done
+        fi
+
+        # If the versions match, we want to increment the chart's patch version
+        if [ "has_match" ]; then
+          echo "Versions match!"
+          
+          # Increment the patch version of the local version (Ex. 1.0.0 -> 1.0.1)
+          IFS='.' read -r major minor patch <<< "$LOCAL_VERSION"
+          patch=$((patch + 1))
+          NEW_LOCAL_VERSION="$major.$minor.$patch"
+
+          echo "New Local Version: $NEW_LOCAL_VERSION"
+          echo "Committing new chart version change..."
+
+          sed -i "s|version: \"$LOCAL_VERSION\"|version: \"$NEW_LOCAL_VERSION\"|g" Chart.yaml
+
+          LOCAL_VERSION=$NEW_LOCAL_VERSION
+
+          # Update remote URL to use the GITHUB_TOKEN for authentication
+          git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git
+
+          # Setup git user details for committing the version change and tag
+          git config user.name "GitHub Actions"
+          git config user.email "actions@github.com"
+              
+          # Commit the version change to the `package.json` file
+          git add Chart.yaml
+          git commit -m "[Github Actions] Update Helm chart version to $(yq -r '.version' Chart.yaml)"
+              
+          # Push the changes to the repository
+          git push origin HEAD:main
+        else
+          echo "Versions do not match!"
+        fi
+
+        helm package .
+        helm push ./$CHART_NAME-$LOCAL_VERSION.tgz oci://${{ secrets.REPOSITORY_HOSTNAME }}/helm    
+  
+  # Deploy to our environment (stagging cluster)
+  deploy:
+    runs-on: self-hosted
+    needs: build
+    outputs:
+      backup-created: ${{ steps.backup.outputs.created }}
+      restore-time: ${{ steps.deploy-helm-chart.outputs.RESTORE_TIME }}
+    env:
+      NAMESPACE: ciam-service-dashboard
+      RELEASE_NAME: services
+      CHART_NAME: ${{ needs.build.outputs.chart-name }}
+    steps:
+    # Check if it's already deployed
+    # This determines if we do the backup and takedown before deploying the new version 
+    - name: Already Deployed Check
+      id: already-deployed-check
+      run: |
+        # Check if the Helm chart is already deployed
+        if helm status -n ${{ env.NAMESPACE }} ${{ env.RELEASE_NAME }}; then
+          echo "Already deployed"
+          echo "already-deployed=true" >> $GITHUB_OUTPUT
+        else
+          echo "Not deployed"
+          echo "already-deployed=false" >> $GITHUB_OUTPUT
+        fi
+    
+    # Tale a backup of the current state of the resources
+    - name: Backup Data
+      id: backup
+      if: steps.already-deployed-check.outputs.already-deployed == 'true'
+      run: |
+        # -- Setup Backup Sidecar metadata variables --
+        
+        # Variable for the image name (so that there can't be typos etc...)
+        BACKUP_SIDECAR_IMAGE_NAME="backup-sidecar"
+        
+        # Get the available tags from the image registry
+        IFS=' ' read -r -a BACKUP_SIDECAR_TAGS <<< $(skopeo list-tags docker://${{ secrets.REPOSITORY_HOSTNAME }}/k8s/$BACKUP_SIDECAR_IMAGE_NAME | jq -r '.Tags | @sh')
+        
+        # Get the latest tag from the list of tags
+        LATEST_BACKUP_SIDECAR_TAG=$(echo ${BACKUP_SIDECAR_TAGS[@]} | tr ' ' '\n' | sort -V | tail -n 1)
+        LATEST_BACKUP_SIDECAR_TAG=${LATEST_BACKUP_SIDECAR_TAG#\'}  # Remove leading quote
+        LATEST_BACKUP_SIDECAR_TAG=${LATEST_BACKUP_SIDECAR_TAG%\'}  # Remove trailing quote
+        echo "Latest Backup Sidecar Tag: $LATEST_BACKUP_SIDECAR_TAG"
+        
+        # Get/Generate the "full" image name (including the tag) for the backup sidecar
+        FULL_BACKUP_SIDECAR_IMAGE_NAME="${{ secrets.REPOSITORY_HOSTNAME }}/k8s/$BACKUP_SIDECAR_IMAGE_NAME:$LATEST_BACKUP_SIDECAR_TAG"
+        echo "Looking for backup sidecar: $FULL_BACKUP_SIDECAR_IMAGE_NAME"
+
+        # -- END: Setup Backup Sidecar metadata variables --
+
+        # -- Create a backup --
+        
+        # Get the name of the running main pod (the one that has the backup sidecar container in it)
+        RUNNING_MAIN_POD=$(kubectl -n ${{ env.NAMESPACE }} get pods -o json | jq -r --arg img "$FULL_BACKUP_SIDECAR_IMAGE_NAME" '.items[] | select(.spec.containers[].image == $img) | .metadata.name') 
+        echo "Running Main Pod: $RUNNING_MAIN_POD"
+        
+        # Make the request to create the backups
+        CREATE_BACKUP_OUTPUT=$(kubectl -n ${{ env.NAMESPACE }} exec $RUNNING_MAIN_POD -c ${{ env.RELEASE_NAME }} -- /bin/bash -c "curl -X PUT -sSL http://localhost:4000/backup -o backup-output.txt && cat backup-output.txt")
+        echo "Create Backup Output: $CREATE_BACKUP_OUTPUT"
+        
+        # Parse the output to check if the backup was created successfully
+        BACKUP_OUTPUT_MESSAGE=$(echo $CREATE_BACKUP_OUTPUT | jq -r '.message')
+        if [ "$BACKUP_OUTPUT_MESSAGE" == "Backup created successfully" ]; then
+          echo "Backup created successfully."
+          echo "created=true" >> $GITHUB_OUTPUT
+        else
+          echo "Error creating backup: $BACKUP_OUTPUT_MESSAGE"
+          exit 1
+        fi
+
+        # -- END: Create a backup --
+    
+    # Because we've taken a backup of it's current state and it's easier to start fresh etc...
+    # We delete all the existing resources (including the Helm chart) before (re-)deploying the new one
+    - name: Remove old resources
+      if: steps.backup.outputs.created == 'true'
+      run: |
+        # Use a custom script (found in [Helpful Scripts]()) to uninstall the Helm chart and delete any associated resources
+        k8s-delete \
+            --namespace ${{ env.NAMESPACE }} \
+            --release-name ${{ env.RELEASE_NAME }} \
+            --rwx-volumes vault-role-vars
+    
+    # Deploy the resources to the cluster
+    - name: Deploy Helm Chart
+      id: deploy-helm-chart
+      run: |
+        FILLED_VALUES_FILE="values.filled.yaml"
+
+        # Download a filled version of the `values.yaml` file from a secure location
+        DOWNLOAD_FILE=$(curl -sSL https://secure-storage.bridgemanaccessible.ca/services-dashboard/filled-values?token=${{ secrets.SECURE_STORAGE_TOKEN }} -o $FILLED_VALUES_FILE || echo "Failed to download filled values file.")
+        if [ "$DOWNLOAD_FILE" == "Failed to download filled values file." ]; then
+          echo "Error: $DOWNLOAD_FILE"
+          exit 1
+        fi
+
+        if [ -n "${{ steps.backup.outputs.created }}" ]; then
+          CURR_DATETIME=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
+
+          echo "Setting restoreFromBackup to: $CURR_DATETIME"
+
+          # If the backup was created successfully, we want to set the `restoreFromBackup` variable to true in the filled values file
+          sed -i "s|restoreFromBackup: \"\"|restoreFromBackup: \"$CURR_DATETIME\"|g" $FILLED_VALUES_FILE
+          
+          echo "RESTORE_TIME=$CURR_DATETIME" >> $GITHUB_OUTPUT
+        fi
+
+        # Parse the chart name from the Chart.yaml
+        CHART_NAME=${{ env.CHART_NAME }}
+        echo "Chart Name: $CHART_NAME"
+
+        # Can run `k8s-deploy --help` if you want to see all the options available
+        k8s-deploy \
+          --namespace ${{ env.NAMESPACE }} \
+          --release-name ${{ env.RELEASE_NAME }} \
+          --filled-values-file $FILLED_VALUES_FILE \
+          --chart-name $CHART_NAME \
+          --rwx-volumes vault-role-vars
+  
+  # Once deployed, we want to restore it to it's previous state (if applicable)
+  restore:
+    runs-on: self-hosted
+    needs: deploy
+    if: needs.deploy.outputs.backup-created == 'true'
+    env:
+      NAMESPACE: ciam-services-dashboard
+      CONTAINER_NAME: services
+      RESTORE_TIME: ${{ needs.deploy.outputs.restore-time }}
+      SITE_HOSTNAME: services.bridgemanaccessible.ca
+    steps:
+    # Restore the data from the backup
+    - name: Restore data
+      run: |
+        # -- Setup Backup Sidecar metadata variables --
+        
+        # Variable for the image name (so that there can't be typos etc...)
+        BACKUP_SIDECAR_IMAGE_NAME="backup-sidecar"
+        
+        # Get the available tags from the image registry
+        IFS=' ' read -r -a BACKUP_SIDECAR_TAGS <<< $(skopeo list-tags docker://${{ secrets.REPOSITORY_HOSTNAME }}/k8s/$BACKUP_SIDECAR_IMAGE_NAME | jq -r '.Tags | @sh')
+        
+        # Get the latest tag from the list of tags
+        LATEST_BACKUP_SIDECAR_TAG=$(echo ${BACKUP_SIDECAR_TAGS[@]} | tr ' ' '\n' | sort -V | tail -n 1)
+        LATEST_BACKUP_SIDECAR_TAG=${LATEST_BACKUP_SIDECAR_TAG#\'}  # Remove leading quote
+        LATEST_BACKUP_SIDECAR_TAG=${LATEST_BACKUP_SIDECAR_TAG%\'}  # Remove trailing quote
+        echo "Latest Backup Sidecar Tag: $LATEST_BACKUP_SIDECAR_TAG"
+        
+        # Get/Generate the "full" image name (including the tag) for the backup sidecar
+        FULL_BACKUP_SIDECAR_IMAGE_NAME="${{ secrets.REPOSITORY_HOSTNAME }}/k8s/$BACKUP_SIDECAR_IMAGE_NAME:$LATEST_BACKUP_SIDECAR_TAG"
+        echo "Looking for backup sidecar: $FULL_BACKUP_SIDECAR_IMAGE_NAME"
+
+        # -- END: Setup Backup Sidecar metadata variables --
+        
+        # Get the name of the running main pod (the one that has the backup sidecar container in it)
+        RUNNING_MAIN_POD=$(kubectl -n ${{ env.NAMESPACE }} get pods -o json | jq -r --arg img "$FULL_BACKUP_SIDECAR_IMAGE_NAME" '.items[] | select(.spec.containers[].image == $img) | .metadata.name') 
+        echo "Running Main Pod: $RUNNING_MAIN_POD"
+
+        # Variables for retry logic
+        MAX_RETRIES=5
+        RETRY_INTERVAL=30
+        RETRY_COUNT=0
+
+        # Because Kubernetes (or at least our stagging cluster) can be a bit tempramental,
+        # We allow for "resetting" a few times.
+        # By resetting, we mean re-detecting the main running pod etc...
+        MAX_RESETS=3
+        RESET_COUNT=0
+
+        POD_STATUS=$(kubectl -n ${{ env.NAMESPACE }} get pod $RUNNING_MAIN_POD -o json | jq -r '.status.phase')
+        echo "Pod Status: $POD_STATUS"
+
+        # Retry (wait) a few times if the pod isn't running yet
+        while [ "$POD_STATUS" != "Running" ] && [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
+          echo "Pod is not running yet (Current Status: $POD_STATUS). Waiting for $RETRY_INTERVAL seconds (attempt $((RETRY_COUNT + 1))/$MAX_RETRIES)..."
+          sleep $RETRY_INTERVAL
+          
+          # Get the current status of the pod
+          POD_STATUS=$(kubectl -n ciam-account-dashboard get pod $RUNNING_MAIN_POD -o json | jq -r '.status.phase')
+
+          # Increment the retry count
+          RETRY_COUNT=$((RETRY_COUNT + 1))
+
+          # If the pod is in a failed state, we want to "reset"
+          # Though, we only want to reset a few times (to avoid infinite loops if something else is wrong etc...) 
+          if [[ "$POD_STATUS" == "Failed" ]] && [ $RESET_COUNT -lt $MAX_RESETS ]; then
+            echo "Pod is in a failed state. Resetting..."
+
+            # Reset the pod and increment the reset count
+            RUNNING_MAIN_POD=$(kubectl -n ${{ env.NAMESPACE }} get pods -o json | jq -r --arg img "$FULL_BACKUP_SIDECAR_IMAGE_NAME" --arg prev "$RUNNING_MAIN_POD" '.items[] | select(.spec.containers[].image == $img and .metadata.name != $prev) | .metadata.name') 
+            echo "Running Main Pod Reset: $RUNNING_MAIN_POD"
+            
+            # Reset the retry count
+            RETRY_COUNT=0
+            echo "Retry Count Reset"
+
+            RESET_COUNT=$((RESET_COUNT + 1))
+          fi
+        done
+
+        if [ "$POD_STATUS" != "Running" ]; then
+          echo "Error: Pod did not start running after $((RETRY_COUNT * RETRY_INTERVAL)) seconds."
+          exit 1
+        fi
+
+        LATEST_BACKUPS=$(kubectl -n ${{ env.NAMESPACE }} exec $RUNNING_MAIN_POD -c ${{ env.CONTAINER_NAME }} -- /bin/bash -c "curl -sSL http://localhost:4000/backups/latest -o latest-backup.json && cat latest-backup.json")
+        echo "Latest Backups: $LATEST_BACKUPS"
+        
+        LATEST_DB_BACKUP=$(echo $LATEST_BACKUPS | jq -r '.backups.db')
+        echo "Latest DB Backup: $LATEST_DB_BACKUP"
+
+        LATEST_VAULT_BACKUP=$(echo $LATEST_BACKUPS | jq -r '.backups.vault')
+        echo "Latest Vault Backup: $LATEST_VAULT_BACKUP"
+
+        echo "Restoring Vault Backup: $LATEST_VAULT_BACKUP at restore time: ${{ env.RESTORE_TIME }}"
+        restore_output=$(kubectl -n ${{ env.NAMESPACE }} exec $RUNNING_MAIN_POD -c ${{ env.CONTAINER_NAME }} -- /bin/bash -c "curl -s -X POST -H 'Content-Type: application/json' -d '{ \"restoreTime\": \"${{ env.RESTORE_TIME }}\" }' http://localhost:4000/restore/latest -o restore-output.txt && cat restore-output.txt")
+        echo "Restore Output: $restore_output"
+    
+    # Verify that the site starts up after the restore
+    - name: Verify Restore
+      run: |
+        SITE_REQUEST_STATUS_CODE=$(curl -sSL https://$SITE_HOSTNAME -w '%{http_code}' -o /dev/null)
+
+        MAX_RETRIES=5
+        RETRY_INTERVAL=30
+        RETRY_COUNT=0
+
+        # Retry (wait) a few times if the site isn't up yet
+        while [ "$SITE_REQUEST_STATUS_CODE" != "200" ] && [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
+          echo "Site is not up yet (Current Status Code: $SITE_REQUEST_STATUS_CODE). Waiting for $RETRY_INTERVAL seconds (attempt $((RETRY_COUNT + 1))/$MAX_RETRIES)..."
+          sleep $RETRY_INTERVAL
+          
+          # Get the current status of the site
+          SITE_REQUEST_STATUS_CODE=$(curl -sSL http://account.bridgemanaccessible.ca -w '%{http_code}' -o /dev/null)
+
+          # Increment the retry count
+          RETRY_COUNT=$((RETRY_COUNT + 1))
+        done
+
+        if [ "$SITE_REQUEST_STATUS_CODE" != "200" ]; then
+          echo "Error: Site did not start up after $((RETRY_COUNT * RETRY_INTERVAL)) seconds."
+          exit 1
+        fi
+
+        echo "Site is up and running (Status Code: $SITE_REQUEST_STATUS_CODE)."