DEV Community

Code Challenge: Follow the Dirty Money

jorin on November 01, 2017

A shady Internet business has been discovered. The website has been made public by a whistle blower. We have enough evidence about the dirty deals...

Read full post

Florian Rohrer • Nov 1 '17

Loving it!

import json
import requests
import re

start_url = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"
matcher = re.compile(r"\$\d+[\.,]\d+")
money = 0
todo = []
seen = set()

todo.append(start_url)

while todo:
    nexturl = todo.pop()
    data = requests.get(nexturl).json()
    if data["id"] in seen:
        continue
    pos = matcher.search(data["content"]).group()[1:]
    money += float(pos.replace(",", "."))
    todo += data["links"]
    seen.add(data["id"])

print("Final amount: $%.2f" % money)

RevanProdigalKnight • Nov 2 '17 • Edited

const
  visitedLinks = new Set(),
  visitedIds = new Set(); // Just in case somebody gets tricky and adds unique links that have duplicate IDs

async function visitLink(link) {
  let total = 0;

  if (!visitedLinks.has(link)) {
    visitedLinks.add(link);

    const { id, content, links } = await (await fetch(link)).json();

    if (!visitedIds.has(id)) {
      visitedIds.add(id);

      total += parseFloat(
        content
          .match(/\$[\d,]+(?:[,.]\d+)?/)[0]
          .substring(1)
          .replace(',', '.')
      );

      for (const link of links) {
        total += await visitLink(link);
      }
    }
  }

  return total;
}

visitLink(
  'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json'
).then(console.log); // 9064.78999999999 (JS rounding errors, yay)

Carlos Gortaris • Nov 2 '17

My take:

import sys
import requests
import json
import re

trx = {}
nex = [ 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json' ]
rgx = r".*\$([0-9]+)[,.]{1}([0-9]+)[^0-9]*"

def getAmount(s):
    global rgx
    mtc         = re.search(rgx, s)
    intpart     = mtc.group(1)
    decimalpart = mtc.group(2)
    return float("{}.{}".format(intpart, decimalpart))

while len(nex) > 0:
    url = nex.pop()
    req = requests.get(url)
    jsn = req.json()
    for link in jsn['links']:
        nex.append(link)
    idd = jsn['id']
    trx[idd] = jsn['content']

sum = 0
for i in trx.keys():
    amount  = getAmount(trx[i])
    sum     += amount

print(sum)

Yaphi Berhanu • Nov 2 '17

In JavaScript:

const transactions = {};
function getPrice(content){
    return Number(content.match(/\$[^\s"]+/)[0].replace(/(\$|\D$)/g,'').replace(/,/,'.'));
}
function getData(url){
    return fetch(url)
        .then(response => response.json())
        .then(data => {
            transactions[data.id] = getPrice(data.content);
            return Promise.all(data.links.map(getData));
        });
}
getData('https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json')
    .then(responses => {
        const totalMoney = Object.values(transactions).reduce((tot, cur) => tot + cur, 0).toFixed(2);
        console.log(`total money: $${totalMoney}`);
    });

Ryan Palo • Nov 1 '17

I love whenever anybody posts challenges like this! Also, having the decimal separator be both . and , was tricksy.

require 'bigdecimal'  # Use decimals when money is involved
require 'json'
require 'net/http'
require 'set'

# Hunts down linked transactions without double-counting them
class TransactionFinder
  attr_reader :found

  def initialize(first_url)
    @pending = [first_url]
    @found = {
      # 'fd0d929f'... : '$1699,15'
    }
    @hit_uris = Set.new
  end

  def hunt
    until @pending.empty?
      current = @pending.pop
      next if @hit_uris.include?(current)
      @hit_uris.add(current)

      result = get(current)
      @found[result['id']] = result['content'].scan(/\$[0-9,.]+/).first
      @pending.concat(result['links'])
    end
  end

  def get(uri)
    result_string = Net::HTTP.get_response(URI(uri))
    result = JSON.parse(result_string.body)
  end

  def write_transactions(filename)
    File.open(filename, 'w') { |f| f.write(JSON.pretty_generate(@found)) }
  end

  def total
    @found.values.reduce(BigDecimal.new('0')) do |sum, current|
      dollars, cents = current.scan(/[0-9]+/)
      sum + BigDecimal.new("#{dollars}.#{cents}")
    end
  end
end

first_uri = 'https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json'
t = TransactionFinder.new(first_uri)
t.hunt
t.write_transactions('data.json')
puts "$#{t.total.to_f}"

SPOILER

$9064.79

Stanley Nguyen • Nov 3 '17

My solution using goroutines for speed :)

package main

import (
    "encoding/json"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"
    "os"
    "regexp"
    "strconv"
    "strings"
)

var r, _ = regexp.Compile("\\$[0-9]+(\\,|\\.)[0-9]{0,2}")

type transaction struct {
    Content string   `json:"content"`
    Links   []string `json:"links"`
}

func crawl(sum chan<- float64, URLsChan chan<- []string, startingURL string) {
    response, err := http.Get(startingURL)
    if err != nil {
        log.Fatal(err)
        os.Exit(1)
    }

    defer response.Body.Close()
    resBuf, err := ioutil.ReadAll(response.Body)
    if err != nil {
        log.Fatal(err)
        os.Exit(1)
    }

    var trans transaction
    json.Unmarshal(resBuf, &trans)

    foundStrArr := r.FindAllString(trans.Content, -1)
    if len(foundStrArr) == 0 {
        sum <- 0
    } else {
        for _, elem := range foundStrArr {
            elem = strings.Replace(elem, ",", ".", -1)
            val, _ := strconv.ParseFloat(elem[1:], 64)
            sum <- val
        }
    }
    URLsChan <- trans.Links
}

func main() {
    sum := make(chan float64)
    urlsChan := make(chan []string)
    urlsList := []string{"https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"}
    var totalAmt float64

    for len(urlsList) > 0 {
        for _, url := range urlsList {
            go crawl(sum, urlsChan, url)
        }
        newUrlsList := []string{}
        for _ = range urlsList {
            totalAmt += <-sum
            newUrlsList = append(newUrlsList, <-urlsChan...)
        }
        urlsList = newUrlsList
    }
    fmt.Println("Total amount is", totalAmt)
}

P/s: Sorry for the ugly code :D It was written in a hurry

jorin • Nov 3 '17

Nice! I wrote a similar version but using a sync.WaitGroup and a separate constant number of workers to parallelize the download. You can find it here.

Stanley Nguyen • Nov 3 '17

One possible way is to further optimize by let different "layers" of json object urls running "concurrently". Nevertheless, I haven't come up with an actual implementation (as you can see, right now my implementation only crawl one by one "layer" of gist urls)

Richard Orelup • Nov 5 '17

My quick PHP solution

<?php

$initialTransaction = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json";
$transactions = array();

function followTheMoney($transactionUrl) {
  global $transactions;

  $ch = curl_init();
  curl_setopt($ch, CURLOPT_URL, $transactionUrl);
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  $transactionJson = curl_exec($ch);
  curl_close($ch);

  $transaction = json_decode($transactionJson, true);
  $amount = str_replace(",",".",explode(" ",end(explode('$',$transaction['content'])))[0]);
  $transactions[$transaction['id']] = $amount;

  foreach ($transaction['links'] as $link) {
    if (!array_key_exists(explode(".json",end(explode("/",$link)))[0],$transactions)) {
      followTheMoney($link);
    }
  }
}

followTheMoney($initialTransaction);

$total = array_sum($transactions);

echo "Total - " . $total . "\n\n";

?>

Tobias Salzmann • Nov 4 '17 • Edited

Here's a Scala version, asynchronous, concurrent, non-blocking with async/await.
Using dispatch for requests and circe for json decoding.

import dispatch.Defaults._
import dispatch._
import io.circe.generic.auto._
import io.circe.parser._

import scala.async.Async.{async, await}
import scala.concurrent.Future

object TransactionTotal {
  val initialUrl = "https://gist.githubusercontent.com/jorinvo/6f68380dd07e5db3cf5fd48b2465bb04/raw/c02b1e0b45ecb2e54b36e4410d0631a66d474323/fd0d929f-966f-4d1a-89cd-feee5a1c5347.json"

  def futureTotal(url: String = initialUrl): Future[BigDecimal] =
      getAllTransactions(url)
          .map(totalAmount)

  private case class Transaction(id: String, content: String, links: List[String]) {
    def amount = {
      val pattern = ".*\\$([0-9\\.\\,]+[0-9]).*".r
      val pattern(value) = content
      BigDecimal(value.replaceAll(",", "."))
    }
  }

  private def getTransaction(requestUrl: String): Future[Transaction] = async {
    val json = await(Http.default(url(requestUrl) OK as.String))
    decode[Transaction](json).toTry.get
  }

  private def getAllTransactions(url: String): Future[List[Transaction]] = async {
    val transaction = await(getTransaction(url))
    val nestedTransactions = await(Future.sequence(transaction.links.map(getAllTransactions)))
    val transactions = nestedTransactions.flatten
    transaction :: transactions
  }

  private def totalAmount(transactions: List[Transaction]) = {
    transactions
      .distinct
      .map(_.amount)
      .sum
  }
}