Calculate Percentage with Elixir

Calculate Percentage with Elixir

Introduction

This is part seven of the nine post series on Processing a Log File with Elixir. If you find this article helpful, please subscribe and share 🚀 Here is our list of steps
    1. Fetch data from URL
    2. Split each new line into a list item
    3. Split each line into list items
    4. Filter items to only contain the URL and TCP_HIT/MISS
    5. Find the six-digit video id from the URL, it should be the first integer in HTTP paths of:
"example.com/04C0BF/v2/sources/content-owners/" and "example.com/04C0BF/ads/transcodes/"
  1. Group by Video ID
  2. Get Cache Hit and Misses for each Video
  3. Calculate the Cache Hit Misses
  4. Sort by video id
  5. Print to file
Looking at our list of things to do, the next step is to "Get Cache Hit and Misses for each Video". Our data is now looking like this:

%{
  [video_id: 406301] => [
    [video_id: 406301, tcp: "TCP_HIT/200"],
    [video_id: 406301, tcp: "TCP_HIT/200"]
  ],
  [video_id: 308171] => [[video_id: 308171, tcp: "TCP_HIT/200"]],
  [video_id: 366084] => [
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, ...],
    [...],
    ...
  ],
  [video_id: 401931] => [[video_id: 401931, tcp: "TCP_HIT/200"]],
  [video_id: 386242] => [[video_id: 386242, tcp: "TCP_HIT/200"]], 
  [video_id: 366231] => [[video_id: 366231, tcp: "TCP_HIT/200"]],
  [video_id: 191739] => [[video_id: 191739, tcp: "TCP_HIT/200"]],
  [video_id: 136715] => [[video_id: 136715, ...]],
  [video_id: 106652] => [[...], ...],
  [...] => [...],
  ...
}
Before we can do our calculation, we need to count each video_id's hit and miss totals to look like %{:video_id => "1", "HIT" => 3, "MISS" => 0}. The test looks like this
defmodule AccessLogAppTest do
  ...
  test "Get cache hit/misses for each video_id" do
    list = %{
      [video_id: "1", tcp: "TCP_HIT/200"] => [
        [video_id: "1", tcp: "TCP_HIT/200"],
        [video_id: "1", tcp: "TCP_HIT/200"],
        [video_id: "1", tcp: "TCP_HIT/200"]
      ],
      [video_id: "2", tcp: "TCP_HIT/206"] => [
        [video_id: "2", tcp: "TCP_HIT/206"],
        [video_id: "2", tcp: "TCP_HIT/206"],
        [video_id: "2", tcp: "TCP_HIT/206"]
      ],
      [video_id: "3", tcp: "TCP_MISS/206"] => [
        [video_id: "3", tcp: "TCP_MISS/206"],
        [video_id: "3", tcp: "TCP_MISS/206"],
        [video_id: "3", tcp: "TCP_MISS/206"]
      ],
      [video_id: "4", tcp: "TCP_HIT/206"] => [
        [video_id: "4", tcp: "TCP_HIT/206"],
        [video_id: "4", tcp: "TCP_HIT/206"],
        [video_id: "4", tcp: "TCP_HIT/206"]
      ],
      [video_id: "5", tcp: "TCP_HIT/206"] => [
        [video_id: "5", tcp: "TCP_MISS/206"],
        [video_id: "5", tcp: "TCP_HIT/206"],
        [video_id: "5", tcp: "TCP_HIT/206"]
      ]
    }
    result = get_cache_hit_misses(list)
    assert result ==  [
      %{:video_id => "1", "HIT" => 3, "MISS" => 0},
      %{:video_id => "2", "HIT" => 3, "MISS" => 0},
      %{:video_id => "3", "HIT" => 0, "MISS" => 3},
      %{:video_id => "4", "HIT" => 3, "MISS" => 0},
      %{:video_id => "5", "HIT" => 2, "MISS" => 1}
    ]
  end
  ...
end

Solution

We start with Enum.map/2 to iterate over the list to get each item. Each item Returns a tuple representation of each list item, for example {[video_id: 136715], [[video_id: 136715, tcp: "TCP_HIT/200"]]}. We start by getting the video_id by pattern matching it:

  def get_cache_hit_misses(list) do
    Enum.map(list, fn item ->
      {[{_, video_id}], _} = item
    end)
  end
Next, to get a list of the TCP_HIT/MISS values of each video, we get the list containing the list items in the second element of the tuple with elem/2. We then use another Enum.map/2 to get just the tcp: value. Next, we use String.trim/2 , String.split/2 and a case statement to get the value of either "HIT" or "MISS".

  def get_cache_hit_misses(list) do
    Enum.map(list, fn item ->
      ...
      item
      |> elem(1)
      |> Enum.map(fn tcp ->
        [_, {_, tcp}] = tcp
        String.trim(tcp, "TCP_")
        |> String.split("/")
        |> case do
          ["HIT", _] -> "HIT"
          ["MISS", _] -> "MISS"
        end
      end)
      ...
    end)
  end
The next step is to count each "HIT" or "MISS" and add it to an accumulated value of "HIT" or "MISS". To do that we can use Enum.reduce/3, which takes an enumerator, accumulator and function. With each iteration we use Map.update/4Map.update/4, which takes a map, key, default and a function as its parameters.

  def get_cache_hit_misses(list) do
    Enum.map(list, fn items ->
      ...

      items
      |> elem(1)
      |> Enum.map(fn tcp ->
        ...
      end)
      |> Enum.reduce(%{"HIT" => 0, "MISS" => 0}, fn value, acc ->
        Map.update(acc, value, 0, &(&1 + 1))
      end)
      ...
    end)
  end
The final step is to put the video_id into the accumulated that is returned from the previous steps.

  def get_cache_hit_misses(list) do
    Enum.map(list, fn items ->
      ...

      ...
      |> Map.put(:video_id, video_id)
    end)
  end

Change number into percentage with Elixir

  test "Formats TCP_hit_percentage into percent" do
    data = [
      %{:video_id => 1, "TCP_hit_percentage" => 1.0},
      %{:video_id => 2, "TCP_hit_percentage" => 1.0},
      %{:video_id => 3, "TCP_hit_percentage" => 0},
      %{:video_id => 4, "TCP_hit_percentage" => 1.0},
      %{:video_id => 5, "TCP_hit_percentage" => 0.75}
    ]
    result = integer_to_percent(data, "TCP_hit_percentage")
    assert result ==  [
      %{:video_id => 1, "TCP_hit_percentage" => "100%"},
      %{:video_id => 2, "TCP_hit_percentage" => "100%"},
      %{:video_id => 3, "TCP_hit_percentage" => "0%"},
      %{:video_id => 4, "TCP_hit_percentage" => "100%"},
      %{:video_id => 5, "TCP_hit_percentage" => "75%"}
    ]
  end
Install dependencies access_log_app-v2/mix.exs
defmodule AccessLogApp.MixProject do
  ...
  defp deps do
    [
      ...
      {:number, "~> 1.0"}
    ]
  end
end
Install dependencies
mix deps.get
The integer to percent function
  def integer_to_percent(lines, field_name) do
    Enum.map(lines, fn x ->
      x2 = x
      |> Map.get(field_name)
      |> Number.Percentage.number_to_percentage(precision: 2)
      |> String.split(".")
      |> Enum.join

      field_percentage = case x2 do
        "100%"  -> "100%"
        "000%"  -> "0%"
        _  -> String.split(x2, "0") |> Enum.join
      end

      video_id = x
      |> Map.get(:video_id)
        %{:video_id => video_id, field_name => field_percentage}
    end)
  end

Conclusion

In this post we updated our list to just have the video_id and hit and miss counts. In tomorrow's post we will calculate take the HIT and MISS counts and turn them into a percentage. That's it for today! If you like, please share and subscribe!