Calculate Percentage with Elixir

Introduction

This is part seven of the nine-post series on Processing a Log File with Elixir. If you find this article helpful, please subscribe and share 🚀

Here is our list of steps:

  1. Fetch data from URL
  2. Split each new line into a list item
  3. Split each line into list items
  4. Filter items to only contain the URL and TCP_HIT/TCP_MISS
  5. Find the six-digit video ID from the URL, it should be the first integer in HTTP paths of:
    • "example.com/04C0BF/v2/sources/content-owners/"
    • "example.com/04C0BF/ads/transcodes/"
  6. Group by Video ID
  7. Get Cache Hit and Misses for each Video
  8. Calculate the Cache Hit Misses
  9. Sort by video ID
  10. Print to file

Looking at our list of things to do, the next step is to "Get Cache Hit and Misses for each Video".

Our data is now looking like this:

bash

%{
  [video_id: 406301] => [
    [video_id: 406301, tcp: "TCP_HIT/200"],
    [video_id: 406301, tcp: "TCP_HIT/200"]
  ],
  [video_id: 308171] => [[video_id: 308171, tcp: "TCP_HIT/200"]],
  [video_id: 366084] => [
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, ...],
    [...],
    ...
  ],
  [video_id: 401931] => [[video_id: 401931, tcp: "TCP_HIT/200"]],
  [video_id: 386242] => [[video_id: 386242, tcp: "TCP_HIT/200"]],
  [video_id: 366231] => [[video_id: 366231, tcp: "TCP_HIT/200"]],
  [video_id: 191739] => [[video_id: 191739, tcp: "TCP_HIT/200"]],
  [video_id: 136715] => [[video_id: 136715, ...]],
  [video_id: 106652] => [[...], ...],
  [...] => [...],
  ...
}

Before we can do our calculation, we need to count each video_id's hit and miss totals to look like %{:video_id => "1", "HIT" => 3, "MISS" => 0}. The test looks like this:

access-log-app.text.ex

defmodule AccessLogAppTest do
  ...
  test "Get cache hit/misses for each video_id" do
    list = %{
      [video_id: "1", tcp: "TCP_HIT/200"] => [
        [video_id: "1", tcp: "TCP_HIT/200"],
        [video_id: "1", tcp: "TCP_HIT/200"],
        [video_id: "1", tcp: "TCP_HIT/200"]
      ],
      [video_id: "2", tcp: "TCP_HIT/206"] => [
        [video_id: "2", tcp: "TCP_HIT/206"],
        [video_id: "2", tcp: "TCP_HIT/206"],
        [video_id: "2", tcp: "TCP_HIT/206"]
      ],
      [video_id: "3", tcp: "TCP_MISS/206"] => [
        [video_id: "3", tcp: "TCP_MISS/206"],
        [video_id: "3", tcp: "TCP_MISS/206"],
        [video_id: "3", tcp: "TCP_MISS/206"]
      ],
      [video_id: "4", tcp: "TCP_HIT/206"] => [
        [video_id: "4", tcp: "TCP_HIT/206"],
        [video_id: "4", tcp: "TCP_HIT/206"],
        [video_id: "4", tcp: "TCP_HIT/206"]
      ],
      [video_id: "5", tcp: "TCP_HIT/206"] => [
        [video_id: "5", tcp: "TCP_MISS/206"],
        [video_id: "5", tcp: "TCP_HIT/206"],
        [video_id: "5", tcp: "TCP_HIT/206"]
      ]
    }
    result = get_cache_hit_misses(list)
    assert result ==  [
      %{:video_id => "1", "HIT" => 3, "MISS" => 0},
      %{:video_id => "2", "HIT" => 3, "MISS" => 0},
      %{:video_id => "3", "HIT" => 0, "MISS" => 3},
      %{:video_id => "4", "HIT" => 3, "MISS" => 0},
      %{:video_id => "5", "HIT" => 2, "MISS" => 1}
    ]
  end
  ...
end

Solution

We start with Enum.map/2 to iterate over the list to get each item. Each item Returns a tuple representation of each list item, for example {[video_id: 136715], [[video_id: 136715, tcp: "TCP_HIT/200"]]}. We start by getting the video_id by pattern matching it:

access-log-app.ex

def get_cache_hit_misses(list) do
  Enum.map(list, fn item ->
    {[{_, video_id}], _} = item
  end)
end

Next, to get a list of the TCP_HIT/MISS values of each video, we get the list containing the list items in the second element of the tuple with elem/2. We then use another Enum.map/2 to get just the tcp: value. Next, we use String.trim/2 , String.split/2 and a case statement to get the value of either "HIT" or "MISS".

access-log-app.ex

def get_cache_hit_misses(list) do
  Enum.map(list, fn item ->
    ...
    item
    |> elem(1)
    |> Enum.map(fn tcp ->
      [_, {_, tcp}] = tcp
      String.trim(tcp, "TCP_")
      |> String.split("/")
      |> case do
        ["HIT", _] -> "HIT"
        ["MISS", _] -> "MISS"
      end
    end)
    ...
  end)
end

The next step is to count each "HIT" or "MISS" and add it to an accumulated value of "HIT" or "MISS". To do that we can use Enum.reduce/3, which takes an enumerator, accumulator and function. With each iteration we use Map.update/4Map.update/4, which takes a map, key, default and a function as its parameters.

access-log-app.ex

def get_cache_hit_misses(list) do
  Enum.map(list, fn items ->
    ...
    items
    |> elem(1)
    |> Enum.map(fn tcp ->
      ...
    end)
    |> Enum.reduce(%{"HIT" => 0, "MISS" => 0}, fn value, acc ->
      Map.update(acc, value, 0, &(&1 + 1))
    end)
    ...
  end)
end

The final step is to put the video_id into the accumulated that is returned from the previous steps.

access-log-app.ex

def get_cache_hit_misses(list) do
  Enum.map(list, fn items ->
    ...

    ...
    |> Map.put(:video_id, video_id)
  end)
end

Change number into percentage with Elixir

access-log-app.ex

test "Formats TCP_hit_percentage into percent" do
  data = [
    %{:video_id => 1, "TCP_hit_percentage" => 1.0},
    %{:video_id => 2, "TCP_hit_percentage" => 1.0},
    %{:video_id => 3, "TCP_hit_percentage" => 0},
    %{:video_id => 4, "TCP_hit_percentage" => 1.0},
    %{:video_id => 5, "TCP_hit_percentage" => 0.75}
  ]
  result = integer_to_percent(data, "TCP_hit_percentage")
  assert result ==  [
    %{:video_id => 1, "TCP_hit_percentage" => "100%"},
    %{:video_id => 2, "TCP_hit_percentage" => "100%"},
    %{:video_id => 3, "TCP_hit_percentage" => "0%"},
    %{:video_id => 4, "TCP_hit_percentage" => "100%"},
    %{:video_id => 5, "TCP_hit_percentage" => "75%"}
  ]
end

Install dependencies access_log_app-v2/mix.exs

mix.exs

defmodule AccessLogApp.MixProject do
  ...
  defp deps do
    [
      ...
      {:number, "~> 1.0"}
    ]
  end
end

Install dependencies:

bash

mix deps.get

The integer to percent function

access-log-app.ex

def integer_to_percent(lines, field_name) do
  Enum.map(lines, fn x ->
    x2 = x
    |> Map.get(field_name)
    |> Number.Percentage.number_to_percentage(precision: 2)
    |> String.split(".")
    |> Enum.join

    field_percentage = case x2 do
      "100%"  -> "100%"
      "000%"  -> "0%"
      _  -> String.split(x2, "0") |> Enum.join
    end

    video_id = x
    |> Map.get(:video_id)
      %{:video_id => video_id, field_name => field_percentage}
  end)
end

Conclusion

In this post we updated our list to just have the video_id and hit and miss counts. In tomorrow's post we will calculate take the HIT and MISS counts and turn them into a percentage. That's it for today! If you like, please share and subscribe!

Launch Your Project

Get your project off the ground with Space-Rocket! Fill out the form below to get started.

Space-Rocket pin icon