loading...
Cover image for Code Challenge: Follow the Dirty Money

Code Challenge: Follow the Dirty Money

jorinvo profile image jorin ・1 min read

A shady Internet business has been discovered.

The website has been made public by a whistle blower. We have enough evidence about the dirty deals they did. But to charge them we need to get hands on precise numbers about the transactions that happened on their platform.

Unfortunately no record of the transactions could be seized so far. The only hint we have is this one transaction:

fd0d929f-966f-4d1a-89cd-feee5a1c5347.json

What we need is the total of all transactions in Dollar. Can you trace down all other transactions and get the total?

Be careful to count each transaction only once, even if it is linked multiple times. You can use whatever tool works best for you.

Please share the total and your solution below!

Posted on by:

Discussion

markdown guide
 

Loving it!

import json
import requests
import re

start_url = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
matcher = re.compile(r"\$\d+[\.,]\d+")
money = 0
todo = []
seen = set()

todo.append(start_url)

while todo:
    nexturl = todo.pop()
    data = requests.get(nexturl).json()
    if data["id"] in seen:
        continue
    pos = matcher.search(data["content"]).group()[1:]
    money += float(pos.replace(",", "."))
    todo += data["links"]
    seen.add(data["id"])

print("Final amount: $%.2f" % money)
 
const
  visitedLinks = new Set(),
  visitedIds = new Set(); // Just in case somebody gets tricky and adds unique links that have duplicate IDs

async function visitLink(link) {
  let total = 0;

  if (!visitedLinks.has(link)) {
    visitedLinks.add(link);

    const { id, content, links } = await (await fetch(link)).json();

    if (!visitedIds.has(id)) {
      visitedIds.add(id);

      total += parseFloat(
        content
          .match(/\$[\d,]+(?:[,.]\d+)?/)[0]
          .substring(1)
          .replace(',', '.')
      );

      for (const link of links) {
        total += await visitLink(link);
      }
    }
  }

  return total;
}

visitLink(
  'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json'
).then(console.log); // 9064.78999999999 (JS rounding errors, yay)
 

My take:

import sys
import requests
import json
import re

trx = {}
nex = [ 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json' ]
rgx = r".*\$([0-9]+)[,.]{1}([0-9]+)[^0-9]*"

def getAmount(s):
    global rgx
    mtc         = re.search(rgx, s)
    intpart     = mtc.group(1)
    decimalpart = mtc.group(2)
    return float("{}.{}".format(intpart, decimalpart))

while len(nex) > 0:
    url = nex.pop()
    req = requests.get(url)
    jsn = req.json()
    for link in jsn['links']:
        nex.append(link)
    idd = jsn['id']
    trx[idd] = jsn['content']

sum = 0
for i in trx.keys():
    amount  = getAmount(trx[i])
    sum     += amount

print(sum)
 

In JavaScript:

const transactions = {};
function getPrice(content){
    return Number(content.match(/\$[^\s"]+/)[0].replace(/(\$|\D$)/g,'').replace(/,/,'.'));
}
function getData(url){
    return fetch(url)
        .then(response => response.json())
        .then(data => {
            transactions[data.id] = getPrice(data.content);
            return Promise.all(data.links.map(getData));
        });
}
getData('https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json')
    .then(responses => {
        const totalMoney = Object.values(transactions).reduce((tot, cur) => tot + cur, 0).toFixed(2);
        console.log(`total money: $${totalMoney}`);
    });

 

I love whenever anybody posts challenges like this! Also, having the decimal separator be both . and , was tricksy.

require 'bigdecimal'  # Use decimals when money is involved
require 'json'
require 'net/http'
require 'set'

# Hunts down linked transactions without double-counting them
class TransactionFinder
  attr_reader :found

  def initialize(first_url)
    @pending = [first_url]
    @found = {
      # 'fd0d929f'... : '$1699,15'
    }
    @hit_uris = Set.new
  end

  def hunt
    until @pending.empty?
      current = @pending.pop
      next if @hit_uris.include?(current)
      @hit_uris.add(current)

      result = get(current)
      @found[result['id']] = result['content'].scan(/\$[0-9,.]+/).first
      @pending.concat(result['links'])
    end
  end

  def get(uri)
    result_string = Net::HTTP.get_response(URI(uri))
    result = JSON.parse(result_string.body)
  end

  def write_transactions(filename)
    File.open(filename, 'w') { |f| f.write(JSON.pretty_generate(@found)) }
  end

  def total
    @found.values.reduce(BigDecimal.new('0')) do |sum, current|
      dollars, cents = current.scan(/[0-9]+/)
      sum + BigDecimal.new("#{dollars}.#{cents}")
    end
  end
end

first_uri = 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json'
t = TransactionFinder.new(first_uri)
t.hunt
t.write_transactions('data.json')
puts "$#{t.total.to_f}"

SPOILER

$9064.79

 

My solution using goroutines for speed :)

package main

import (
    "encoding/json"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"
    "os"
    "regexp"
    "strconv"
    "strings"
)

var r, _ = regexp.Compile("\\$[0-9]+(\\,|\\.)[0-9]{0,2}")

type transaction struct {
    Content string   `json:"content"`
    Links   []string `json:"links"`
}

func crawl(sum chan<- float64, URLsChan chan<- []string, startingURL string) {
    response, err := http.Get(startingURL)
    if err != nil {
        log.Fatal(err)
        os.Exit(1)
    }

    defer response.Body.Close()
    resBuf, err := ioutil.ReadAll(response.Body)
    if err != nil {
        log.Fatal(err)
        os.Exit(1)
    }

    var trans transaction
    json.Unmarshal(resBuf, &trans)

    foundStrArr := r.FindAllString(trans.Content, -1)
    if len(foundStrArr) == 0 {
        sum <- 0
    } else {
        for _, elem := range foundStrArr {
            elem = strings.Replace(elem, ",", ".", -1)
            val, _ := strconv.ParseFloat(elem[1:], 64)
            sum <- val
        }
    }
    URLsChan <- trans.Links
}

func main() {
    sum := make(chan float64)
    urlsChan := make(chan []string)
    urlsList := []string{"https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"}
    var totalAmt float64

    for len(urlsList) > 0 {
        for _, url := range urlsList {
            go crawl(sum, urlsChan, url)
        }
        newUrlsList := []string{}
        for _ = range urlsList {
            totalAmt += <-sum
            newUrlsList = append(newUrlsList, <-urlsChan...)
        }
        urlsList = newUrlsList
    }
    fmt.Println("Total amount is", totalAmt)
}

P/s: Sorry for the ugly code :D It was written in a hurry

 

Nice! I wrote a similar version but using a sync.WaitGroup and a separate constant number of workers to parallelize the download. You can find it here.

 

One possible way is to further optimize by let different "layers" of json object urls running "concurrently". Nevertheless, I haven't come up with an actual implementation (as you can see, right now my implementation only crawl one by one "layer" of gist urls)

 

My quick PHP solution

<?php

$initialTransaction = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json";
$transactions = array();

function followTheMoney($transactionUrl) {
  global $transactions;

  $ch = curl_init();
  curl_setopt($ch, CURLOPT_URL, $transactionUrl);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  $transactionJson = curl_exec($ch);
  curl_close($ch);

  $transaction = json_decode($transactionJson, true);
  $amount = str_replace(",",".",explode(" ",end(explode('$',$transaction['content'])))[0]);
  $transactions[$transaction['id']] = $amount;

  foreach ($transaction['links'] as $link) {
    if (!array_key_exists(explode(".json",end(explode("/",$link)))[0],$transactions)) {
      followTheMoney($link);
    }
  }
}

followTheMoney($initialTransaction);

$total = array_sum($transactions);

echo "Total - " . $total . "\n\n";

?>
 

Here's a Scala version, asynchronous, concurrent, non-blocking with async/await.
Using dispatch for requests and circe for json decoding.

import dispatch.Defaults._
import dispatch._
import io.circe.generic.auto._
import io.circe.parser._

import scala.async.Async.{async, await}
import scala.concurrent.Future

object TransactionTotal {
  val initialUrl = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"

  def futureTotal(url: String = initialUrl): Future[BigDecimal] =
      getAllTransactions(url)
          .map(totalAmount)

  private case class Transaction(id: String, content: String, links: List[String]) {
    def amount = {
      val pattern = ".*\\$([0-9\\.\\,]+[0-9]).*".r
      val pattern(value) = content
      BigDecimal(value.replaceAll(",", "."))
    }
  }

  private def getTransaction(requestUrl: String): Future[Transaction] = async {
    val json = await(Http.default(url(requestUrl) OK as.String))
    decode[Transaction](json).toTry.get
  }

  private def getAllTransactions(url: String): Future[List[Transaction]] = async {
    val transaction = await(getTransaction(url))
    val nestedTransactions = await(Future.sequence(transaction.links.map(getAllTransactions)))
    val transactions = nestedTransactions.flatten
    transaction :: transactions
  }

  private def totalAmount(transactions: List[Transaction]) = {
    transactions
      .distinct
      .map(_.amount)
      .sum
  }
}
 

Hi! I think i solved it :) and it was very funny!

End of track: $146.091,89

The code is in my repo:

github.com/DiegoMGar/LearningChall...

 

Really had tons of fun solving it! Thanks for learning opportunity :D

 

Interesting! Im doing it :)
TY

 

You can have a look at some existing solutions over here.