Skip to main content

Enabling and Using Metrics Functionality

On this page, we will enable and use the Opentelemetry Metrics functionality. Once again, we will use the UIP VSCode Plugin to test our changes, for now.

It is assumed that the Opentelemetry Collector is properly configured to accept metric data and expose as a Prometheus endpoint.

Step 1 - Enabling Metrics

Go ahead and open configurations.yml and edit the properties block as shown:

configurations.yml
properties:
agent:
log_level: Info
netname: UIP-DBG-01
otel:
enable_tracing: true
export_metrics: true
trace_endpoint: http://192.168.56.11:4318
metrics_endpoint: http://192.168.56.11:4318
service_name: vscode-uip-debugger
uip_service_name: uip/${extension_name}
api:
extension_start:
- name: es1
log_level: Inherited
runtime_dir: /home/shrey/dev/extensions/test/OtelDemoTest
fields:
src_folder: /tmp/test_src
dst_folder: /tmp/test_dst
file_type:
- txt

The export_metrics property was set to true and the metrics_endpoint was changed to the Opentelemetry Collector URL (it will need to be changed according to your setup).

tip

Similar properties exist in uags.conf and omss.conf that can be used to enable tracing in the Agent. See OTEL_EXPORT_METRICS - UAG configuration option and OTEL_EXPORT_METRICS - OMS configuration option.

Step 2 - Adding Custom Metrics

Let's add some custom metrics. Go ahead and update extension.py as follows:

extension.py
from __future__ import print_function
from universal_extension import UniversalExtension
from universal_extension import ExtensionResult
from universal_extension import ui
from universal_extension import logger

from universal_extension import utility
from universal_extension import otel

import time
import shutil
import os
import random
import json

if otel.is_compatible:
from opentelemetry import trace
from opentelemetry import metrics
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.view import (
ExplicitBucketHistogramAggregation,
View
)
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
OTLPMetricExporter,
)

class Extension(UniversalExtension):
def __init__(self):
"""Initializes an instance of the 'Extension' class"""
# Call the base class initializer
super(Extension, self).__init__()
self.stop = False

self.setup_tracer()
self.setup_metrics()

def setup_tracer(self):
if otel.is_compatible:
self.tracer = trace.get_tracer(__name__)
else:
self.tracer = utility.NoOp()

def setup_metrics(self):
if otel.is_compatible:
self.meter = metrics.get_meter(__name__)
else:
self.meter = utility.NoOp()

self.num_files_transferred_cntr = self.meter.create_counter(
name="num.files.transferred",
description="Number of files transferred",
)
self.file_transfer_duration = self.meter.create_histogram(
name="file.transfer.duration",
description="How long the file took to transfer",
unit="s",
)

@classmethod
def extension_new(cls, fields):
if not otel.is_compatible:
return cls.ExtensionConfig()

return cls.ExtensionConfig(
meter_provider=MeterProvider(
views=[
View(
instrument_name="file.transfer.duration",
aggregation=ExplicitBucketHistogramAggregation(
(0, 0.5, 1, 1.5, 2, 10)
),
)
],
metric_readers=[
PeriodicExportingMetricReader(
OTLPMetricExporter(), export_interval_millis=1000
)
],
)
)

def transfer_file(self, src_path, dst_path, span):
start_time = time.time()

span.set_attributes({"src_file": src_path, "dst_folder": dst_path})

# Ensure destination directory exits
if not os.path.exists(dst_path):
raise FileNotFoundError(
"Destination directory ({0}) does not exist".format(dst_path)
)

# Ensure the source file is not already present in the destination
# directory (unless overwrite is selected)
if os.path.exists(os.path.join(dst_path, os.path.basename(src_path))):
logger.info(
"'{0}' already exists in '{1}'".format(
os.path.basename(src_path), dst_path
)
)
if otel.is_compatible:
span.set_status(
trace.Status(
status_code=trace.StatusCode.ERROR,
description="'{0}' already exists in '{1}'".format(
os.path.basename(src_path), dst_path
),
)
)
return False

shutil.copy(src_path, dst_path)
time.sleep(random.uniform(0, 2))

self.num_files_transferred_cntr.add(
1, {"file_type": os.path.splitext(src_path)[1]}
)
self.file_transfer_duration.record(time.time() - start_time)

return True

def extension_start(self, fields):
"""Required method that serves as the starting point for work performed
for a task instance.

Parameters
----------
fields : dict
populated with field values from the associated task instance
launched in the Controller

Returns
-------
ExtensionResult
once the work is done, an instance of ExtensionResult must be
returned. See the documentation for a full list of parameters that
can be passed to the ExtensionResult class constructor
"""

files_transferred = []
src = fields["src_folder"]
dst = fields["dst_folder"]

file_types = [
ft.lower() if ft.startswith(".") else "." + ft.lower()
for ft in fields["file_type"]
]

if not os.path.exists(src):
raise FileNotFoundError("'{0}' does not exist".format(src))

all_file_list = os.listdir(src)

# filter the files
file_list = []
for f in all_file_list:
file_path = os.path.join(src, f)
file_type = os.path.splitext(file_path)[1]
if os.path.isfile(file_path) and file_type in file_types:
file_list.append(file_path)

logger.info(
"Found {0} files that can be transferred".format(len(file_list))
)

for f in file_list:
if self.stop:
break

span_ctx = (
utility.noop_context()
if not otel.is_compatible
else self.tracer.start_as_current_span("transferring file")
)

with span_ctx as span:
if self.transfer_file(f, dst, span):
files_transferred.append(f)
ui.update_progress(
int(len(files_transferred) / len(file_list) * 100)
)
logger.info("Transferred '{0}' to '{1}'".format(f, dst))

return ExtensionResult(
rc=0 if len(file_list) - len(files_transferred) == 0 else 1,
unv_output="The following files were transferred: \n {0}".format(
json.dumps(files_transferred)
),
message="{0} files found and {1} files transferred".format(
len(file_list), len(files_transferred)
),
)

def extension_cancel(self):
self.stop = True
  • Lines 18-27 import all the necessary metrics-related modules from the Opentelemetry library.
  • Line 46-60 creates a method called setup_metrics``() which set up the meter and uses it to create two metrics. The first metric will be used to count the number of files transferred, and the second to capture the transfer duration.
  • Line 62-83 implements the new extension_new() method introduced in API Level 1.5.0. It allows developers to control the initialization of the MeterProvider and TracerProvider (see UniversalExtension Class (1.5.0) for details). Within extension_new(), we customize the MeterProvider to change the bucket boundaries to the transfer duration histogram; the default boundaries are not suitable for our data. Additionally, we modify the export interval from the default 60 seconds to 1 second.
  • Lines 85-123 update the metrics:
    • Line 86 captures the start time at the beginning of the method
    • Lines 118-120 update the num.files.transferred counter.
    • Line 121 records how long it took to transfer the file.

Step 3 - Verifying Metrics

Now, let's verify our changes. Go ahead and delete all the files inside /tmp/test_dst. Once deleted, press F5 to start the debugging session. Upon completion, navigate to the Prometheus endpoint (http://192.168.56.11:8000/metrics in my case - this will vary) and you should see:

Metrics

# HELP file_transfer_duration_seconds How long the file took to transfer

# TYPE file_transfer_duration_seconds histogram
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="0"} 0
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="0.5"} 1
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="1"} 3
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="1.5"} 3
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="2"} 4
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="10"} 4
file_transfer_duration_seconds_bucket{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1",le="+Inf"} 4
file_transfer_duration_seconds_sum{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 3.421269655227661
file_transfer_duration_seconds_count{agent_id="UIP-DBG-01",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 4

# HELP num_files_transferred_total Number of files transferred

# TYPE num_files_transferred_total counter
num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".json",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 1
num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".txt",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 2
num_files_transferred_total{agent_id="UIP-DBG-01",file_type=".zip",instance="sa-u18:5678",job="Stonebranch.UAC/uip/OtelDemo",security_business_services="",task_name="es1"} 1

As you can see, a total of 4 files were transferred, 2 of them were .txt, 1 was .json and other was .zip. Additionally, we can see the transfer duration. Tools like Grafana can be used to visualize the metrics in a meaningful manner.