Calculate Percentage with Elixir

Calculate Percentage with Elixir

Introduction

This is part seven of the nine post series on Processing a Log File with Elixir. If you find this article helpful, please subscribe and share 🚀

Here is our list of steps

    1. Fetch data from URL
    2. Split each new line into a list item
    3. Split each line into list items
    4. Filter items to only contain the URL and TCP_HIT/MISS
    5. Find the six-digit video id from the URL, it should be the first integer in HTTP paths of:

"example.com/04C0BF/v2/sources/content-owners/" and
"example.com/04C0BF/ads/transcodes/"

  1. Group by Video ID
  2. Get Cache Hit and Misses for each Video
  3. Calculate the Cache Hit Misses
  4. Sort by video id
  5. Print to file

Looking at our list of things to do, the next step is to “Get Cache Hit and Misses for each Video”.

Our data is now looking like this:


%{
  [video_id: 406301] => [
    [video_id: 406301, tcp: "TCP_HIT/200"],
    [video_id: 406301, tcp: "TCP_HIT/200"]
  ],
  [video_id: 308171] => [[video_id: 308171, tcp: "TCP_HIT/200"]],
  [video_id: 366084] => [
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, tcp: "TCP_HIT/206"],
    [video_id: 366084, ...],
    [...],
    ...
  ],
  [video_id: 401931] => [[video_id: 401931, tcp: "TCP_HIT/200"]],
  [video_id: 386242] => [[video_id: 386242, tcp: "TCP_HIT/200"]], 
  [video_id: 366231] => [[video_id: 366231, tcp: "TCP_HIT/200"]],
  [video_id: 191739] => [[video_id: 191739, tcp: "TCP_HIT/200"]],
  [video_id: 136715] => [[video_id: 136715, ...]],
  [video_id: 106652] => [[...], ...],
  [...] => [...],
  ...
}

Before we can do our calculation, we need to count each video_id’s hit and miss totals to look like %{:video_id => "1", "HIT" => 3, "MISS" => 0}. The test looks like this

defmodule AccessLogAppTest do
  ...
  test "Get cache hit/misses for each video_id" do
    list = %{
      [video_id: "1", tcp: "TCP_HIT/200"] => [
        [video_id: "1", tcp: "TCP_HIT/200"],
        [video_id: "1", tcp: "TCP_HIT/200"],
        [video_id: "1", tcp: "TCP_HIT/200"]
      ],
      [video_id: "2", tcp: "TCP_HIT/206"] => [
        [video_id: "2", tcp: "TCP_HIT/206"],
        [video_id: "2", tcp: "TCP_HIT/206"],
        [video_id: "2", tcp: "TCP_HIT/206"]
      ],
      [video_id: "3", tcp: "TCP_MISS/206"] => [
        [video_id: "3", tcp: "TCP_MISS/206"],
        [video_id: "3", tcp: "TCP_MISS/206"],
        [video_id: "3", tcp: "TCP_MISS/206"]
      ],
      [video_id: "4", tcp: "TCP_HIT/206"] => [
        [video_id: "4", tcp: "TCP_HIT/206"],
        [video_id: "4", tcp: "TCP_HIT/206"],
        [video_id: "4", tcp: "TCP_HIT/206"]
      ],
      [video_id: "5", tcp: "TCP_HIT/206"] => [
        [video_id: "5", tcp: "TCP_MISS/206"],
        [video_id: "5", tcp: "TCP_HIT/206"],
        [video_id: "5", tcp: "TCP_HIT/206"]
      ]
    }
    result = get_cache_hit_misses(list)
    assert result ==  [
      %{:video_id => "1", "HIT" => 3, "MISS" => 0},
      %{:video_id => "2", "HIT" => 3, "MISS" => 0},
      %{:video_id => "3", "HIT" => 0, "MISS" => 3},
      %{:video_id => "4", "HIT" => 3, "MISS" => 0},
      %{:video_id => "5", "HIT" => 2, "MISS" => 1}
    ]
  end
  ...
end

Solution

We start with Enum.map/2 to iterate over the list to get each item. Each item Returns a tuple representation of each list item, for example {[video_id: 136715], [[video_id: 136715, tcp: "TCP_HIT/200"]]}.

We start by getting the video_id by pattern matching it:


  def get_cache_hit_misses(list) do
    Enum.map(list, fn item ->
      {[{_, video_id}], _} = item
    end)
  end

Next, to get a list of the TCP_HIT/MISS values of each video, we get the list containing the list items in the second element of the tuple with elem/2. We then use another Enum.map/2 to get just the tcp: value. Next, we use String.trim/2 , String.split/2 and a case statement to get the value of either “HIT” or “MISS”.


  def get_cache_hit_misses(list) do
    Enum.map(list, fn item ->
      ...
      item
      |> elem(1)
      |> Enum.map(fn tcp ->
        [_, {_, tcp}] = tcp
        String.trim(tcp, "TCP_")
        |> String.split("/")
        |> case do
          ["HIT", _] -> "HIT"
          ["MISS", _] -> "MISS"
        end
      end)
      ...
    end)
  end

The next step is to count each “HIT” or “MISS” and add it to an accumulated value of “HIT” or “MISS”. To do that we can use Enum.reduce/3, which takes an enumerator, accumulator and function. With each iteration we use Map.update/4Map.update/4, which takes a map, key, default and a function as its parameters.


  def get_cache_hit_misses(list) do
    Enum.map(list, fn items ->
      ...

      items
      |> elem(1)
      |> Enum.map(fn tcp ->
        ...
      end)
      |> Enum.reduce(%{"HIT" => 0, "MISS" => 0}, fn value, acc ->
        Map.update(acc, value, 0, &(&1 + 1))
      end)
      ...
    end)
  end

The final step is to put the video_id into the accumulated that is returned from the previous steps.


  def get_cache_hit_misses(list) do
    Enum.map(list, fn items ->
      ...

      ...
      |> Map.put(:video_id, video_id)
    end)
  end

Change number into percentage with Elixir

  test "Formats TCP_hit_percentage into percent" do
    data = [
      %{:video_id => 1, "TCP_hit_percentage" => 1.0},
      %{:video_id => 2, "TCP_hit_percentage" => 1.0},
      %{:video_id => 3, "TCP_hit_percentage" => 0},
      %{:video_id => 4, "TCP_hit_percentage" => 1.0},
      %{:video_id => 5, "TCP_hit_percentage" => 0.75}
    ]
    result = integer_to_percent(data, "TCP_hit_percentage")
    assert result ==  [
      %{:video_id => 1, "TCP_hit_percentage" => "100%"},
      %{:video_id => 2, "TCP_hit_percentage" => "100%"},
      %{:video_id => 3, "TCP_hit_percentage" => "0%"},
      %{:video_id => 4, "TCP_hit_percentage" => "100%"},
      %{:video_id => 5, "TCP_hit_percentage" => "75%"}
    ]
  end

Install dependencies
access_log_app-v2/mix.exs

defmodule AccessLogApp.MixProject do
  ...
  defp deps do
    [
      ...
      {:number, "~> 1.0"}
    ]
  end
end

Install dependencies

mix deps.get

The integer to percent function

  def integer_to_percent(lines, field_name) do
    Enum.map(lines, fn x ->
      x2 = x
      |> Map.get(field_name)
      |> Number.Percentage.number_to_percentage(precision: 2)
      |> String.split(".")
      |> Enum.join

      field_percentage = case x2 do
        "100%"  -> "100%"
        "000%"  -> "0%"
        _  -> String.split(x2, "0") |> Enum.join
      end

      video_id = x
      |> Map.get(:video_id)
        %{:video_id => video_id, field_name => field_percentage}
    end)
  end

Conclusion

In this post we updated our list to just have the video_id and hit and miss counts. In tomorrow’s post we will calculate take the HIT and MISS counts and turn them into a percentage.

That’s it for today!

If you like, please share and subscribe!

Published
Categorized as Elixir

By mchavez

Michael Chavez is a web and software developer from San Francisco, California. His experience spans almost a decade, working with San Francisco Bay Area design and development agencies, and high-profile Silicon Valley start-ups and enterprises. After studying Multimedia at City College of San Francisco, Michael self-taught himself programming languages such as JavaScript, Node.js, PHP and founded the web development consultancy, Space-Rocket. Michael is currently working with the Elixir programming language.

Leave a comment

Your email address will not be published. Required fields are marked *