DEV Community

loading...

MongoDB for MySQL People

mdminhazulhaque profile image Md. Minhazul Haque ・5 min read

I am a MySQL guy. I can query MySQL, BigQuery or SQL-like databases for my day to day business. Last week, I faced a challenge where I needed to extract some information from a production MongoDB server that involves aggregation. I had no idea how MongoDB works or how to run CRUD operation on such database. So obviously, I spent my whole weekend in learning MongoDB. In this post I am going to share the MongoDB queries I learnt, and the equivalent MySQL queries as well.

I used MongoDB v3.6, for both daemon and cli, so some functions may not work with your current setup

Populate Data and Load into MongoDB

I used the following piece of code to scrap my Github profile information into a single JSON file.

import requests
import json

username = 'mdminhazulhaque'
repos = []
page = 1

while True:
    params = (
        ('page', page),
        ('type', 'owner'),
        ('per_page', 100)
    )
    response = requests.get('https://api.github.com/users/{username}/repos'\
        .format(username=username), params=params)
    for single in response.json():
        repos.append({
            "name": single.get('name', None),
            "description": single.get('description', None),
            "created_at": single.get('created_at', None),
            "language": single.get('language', None),
            "stargazers_count": single.get('stargazers_count', None)
        })
    if len(response.json()) < 100:
        break
    else:
        page += 1
with open("github.json", "w") as fp:
    json.dump(repos, fp, indent=4)
Enter fullscreen mode Exit fullscreen mode

Then I loaded the JSON data into MongoDB using mongoimport.

mongoimport --db local --collection repos --drop --jsonArray --file github.json 
Enter fullscreen mode Exit fullscreen mode

One record from the repo collection looks like the following.

$ mongo 127.0.0.1/local
> db.repos.findOne()
{
        "_id" : ObjectId("5f1cfca24662cbc419b65e29"),
        "name" : "a-pdf-reader",
        "description" : "A poppler based pdf reader written in Qt4",
        "created_at" : "2012-10-07T20:44:48Z",
        "language" : "C++",
        "stargazers_count": null
}
Enter fullscreen mode Exit fullscreen mode

Let's start playing with MongoDB.

SELECT

select * from repos

> db.repos.find({})

{ "_id" : ObjectId("5f1cfca24662cbc419b65e2a"), "name" : "apt-sync", "created_at" : "2017-11-19T04:57:23Z", "language" : "Python", "stargazers_count" : 0 }
{ "_id" : ObjectId("5f1cfca24662cbc419b65e2b"), "name" : "aws-cli-cheatsheet", "created_at" : "2020-02-27T16:38:21Z", "language" : null, "stargazers_count" : 52 }
{ "_id" : ObjectId("5f1cfca24662cbc419b65e2c"), "name" : "banglalion-wimax-mac", "created_at" : "2015-02-25T19:15:48Z", "language" : "HTML", "stargazers_count" : 0 }
{ "_id" : ObjectId("5f1cfca24662cbc419b65e2d"), "name" : "annoying-bn-text", "created_at" : "2019-06-05T17:25:05Z", "language" : "HTML", "stargazers_count" : 1 }
{ "_id" : ObjectId("5f1cfca24662cbc419b65e2e"), "name" : "banglalionwimaxapi", "created_at" : "2018-10-16T13:03:53Z", "language" : "Python", "stargazers_count" : 0 }
{ "_id" : ObjectId("5f1cfca24662cbc419b65e2f"), "name" : "bcm-wimax-dkms", "created_at" : "2017-05-20T18:22:07Z", "language" : "C", "stargazers_count" : 0 }
{ "_id" : ObjectId("5f1cfca24662cbc419b65e30"), "name" : "bd-mrp-api", "created_at" : "2018-12-31T20:26:48Z", "language" : "Python", "stargazers_count" : 1 }
Enter fullscreen mode Exit fullscreen mode

select name, description from repos

> db.repos.find({}, {_id:0, name:1, description:1})

{ "name" : "a-pdf-reader", "description" : "A poppler based pdf reader written in Qt4" }
{ "name" : "apt-sync", "description" : ":gift: Sync APT installed package list across different machines" }
{ "name" : "aws-cli-cheatsheet", "description" : ":cloud: AWS CLI + JQ = Make life easier" }
{ "name" : "banglalion-wimax-mac", "description" : ":satellite: Banglalion WiMAX on Mac OS X" }
{ "name" : "annoying-bn-text", "description" : "Piss of your friends with awkwardly written Bengali text" }
Enter fullscreen mode Exit fullscreen mode

select name, description from repos limit 2

> db.repos.aggregate([{$project:{_id:0, name:1, description:1}}, {$limit:5}])

{ "name" : "a-pdf-reader", "description" : "A poppler based pdf reader written in Qt4" }
{ "name" : "apt-sync", "description" : ":gift: Sync APT installed package list across different machines" }
Enter fullscreen mode Exit fullscreen mode

select name, description from repos order by name

> db.repos.find({}, {_id:0, name:1, description:1}).sort({name:1})

{ "name" : "Android-Javascript-Bridge", "description" : "Android and Javascript two way communication" }
{ "name" : "Android-JavascriptInterface-Sample", "description" : "Call Java (Android) methods from Webview using Javascript" }
{ "name" : "Bilai-PnP-Gui", "description" : ":cat2: GUI for Bilai PnP Modems" }
{ "name" : "CPPad", "description" : "Write and run C++ codes, instantly!" }
{ "name" : "CSE10Routine", "description" : "Tabbed routine app" }
Enter fullscreen mode Exit fullscreen mode

select name, stargazers_count from repos order by stargazers_count desc

> db.repos.find({}, {_id:0, name:1, stargazers_count:1}).sort({stargazers_count:-1})

{ "name" : "aws-cli-cheatsheet", "stargazers_count" : 52 }
{ "name" : "probhat-osx", "stargazers_count" : 23 }
{ "name" : "PyQt-BPNN", "stargazers_count" : 22 }
{ "name" : "foreach", "stargazers_count" : 14 }
{ "name" : "html-table-to-json", "stargazers_count" : 12 }
Enter fullscreen mode Exit fullscreen mode

select name, stargazers_count from repos where stargazers_count > 20 order by stargazers_count

> db.repos.find({ stargazers_count: { $gt: 20} }, {_id:0, name:1, stargazers_count:1}).sort({stargazers_count:-1})

{ "name" : "aws-cli-cheatsheet", "stargazers_count" : 52 }
{ "name" : "probhat-osx", "stargazers_count" : 23 }
{ "name" : "PyQt-BPNN", "stargazers_count" : 22 }
Enter fullscreen mode Exit fullscreen mode

select name, stargazers_count from repos where language = "Python"

> db.repos.find({ language: "Python" }, {_id:0, name:1, stargazers_count:1})

{ "name" : "apt-sync", "stargazers_count" : 0 }
{ "name" : "banglalionwimaxapi", "stargazers_count" : 0 }
{ "name" : "bd-mrp-api", "stargazers_count" : 1 }
{ "name" : "flightaware-cli", "stargazers_count" : 0 }
{ "name" : "html-table-to-json", "stargazers_count" : 12 }
Enter fullscreen mode Exit fullscreen mode

select name, description from repos where description like "%cli%"

> db.repos.find({ description: /cli/i }, {_id:0, name:1, description:1})

{ "name" : "aws-cli-cheatsheet", "description" : ":cloud: AWS CLI + JQ = Make life easier" }
{ "name" : "ClipMonitor", "description" : "A tiny clip monitor app built with Qt5" }
{ "name" : "foreach", "description" : ":repeat: Read lines from file/stdin and execute them as CLI argument" }
{ "name" : "kakitangan-cli", "description" : ":briefcase: CLI for Kakitangan app, the online HR Software for Malaysian businesses" }
{ "name" : "librs232", "description" : ":paperclip: RS232 Library in C" }
Enter fullscreen mode Exit fullscreen mode

select language, count(*) as count, description from repos group by language order by count

> db.repos.aggregate([{ $group: { _id: "$language", count: { $sum: 1 } } },{ $sort: { count: -1 } }])

{ "_id" : "C++", "count" : 61 }
{ "_id" : "Python", "count" : 25 }
{ "_id" : "HTML", "count" : 12 }
{ "_id" : "Java", "count" : 12 }
{ "_id" : "Shell", "count" : 10 }
Enter fullscreen mode Exit fullscreen mode

select language, sum(stargazers_count) as stars, description from repos group by language order by stars

> db.repos.aggregate([{ $group: { _id: "$language", stars: { $sum: "$stargazers_count" } } },{ $sort: { stars: -1 } }])

{ "_id" : "C++", "stars" : 72 }
{ "_id" : null, "stars" : 52 }
{ "_id" : "Python", "stars" : 47 }
{ "_id" : "Shell", "stars" : 43 }
{ "_id" : "HTML", "stars" : 19 }
Enter fullscreen mode Exit fullscreen mode

select name, substr(created_at, 0, 4) as created_at_year from repos order by created_at_year desc

> db.repos.aggregate([{$addFields:{"created_at_year":{$substr:["$created_at", 0, 4]}}}, {$project: {_id:0, name:1, created_at_year:1}}, {$sort: {created_at_year:-1}}])

{ "name" : "aws-cli-cheatsheet", "created_at_year" : "2020" }
{ "name" : "gitfolio", "created_at_year" : "2020" }
{ "name" : "messenger-bot", "created_at_year" : "2020" }
{ "name" : "urctl", "created_at_year" : "2020" }
{ "name" : "annoying-bn-text", "created_at_year" : "2019" }
{ "name" : "esp32-mqtt-ir-remote", "created_at_year" : "2019" }
Enter fullscreen mode Exit fullscreen mode

select substr(created_at, 0, 4)) as year, count(*) as count from repos group by year order by count desc

> db.repos.aggregate([{$addFields:{"created_at_year":{$substr:["$created_at", 0, 4]}}}, { $group: { _id: "$created_at_year", count: { $sum: 1 } } },{ $sort: { count: -1 } }])

{ "_id" : "2015", "count" : 41 }
{ "_id" : "2016", "count" : 35 }
{ "_id" : "2012", "count" : 18 }
{ "_id" : "2014", "count" : 14 }
{ "_id" : "2013", "count" : 12 }
{ "_id" : "2019", "count" : 10 }
Enter fullscreen mode Exit fullscreen mode

select name, unix_timestamp(str_to_date(created_at, '%Y-%M-%dT%h:%m:%sz')) from repos

> db.repos.aggregate([{$addFields: {date: { $dateFromString: { dateString: "$created_at" }}}}, {$project: {_id:0, created_at_usec: { $subtract: [ "$date", new Date("1970-01-01") ] }, name:1}}])

{ "name" : "a-pdf-reader", "created_at_usec" : NumberLong("1349642688000") }
{ "name" : "apt-sync", "created_at_usec" : NumberLong("1511067443000") }
{ "name" : "aws-cli-cheatsheet", "created_at_usec" : NumberLong("1582821501000") }
{ "name" : "banglalion-wimax-mac", "created_at_usec" : NumberLong("1424891748000") }
{ "name" : "annoying-bn-text", "created_at_usec" : NumberLong("1559755505000") }
Enter fullscreen mode Exit fullscreen mode

DELETE

delete from repose where language = "C++"

> db.repos.remove({language:"C++"})
Enter fullscreen mode Exit fullscreen mode

ALTER

alter table repos drop column created_at

> db.repos.update({},{$unset:{created_at:""}})
Enter fullscreen mode Exit fullscreen mode

alter table repos insert column foo int

> db.repos.update({}, {$set: {foo:0}}, {multi:1})
Enter fullscreen mode Exit fullscreen mode

That's all for today. Feel free to share if any of the queries can be improved. Thanks for reading.

Discussion

pic
Editor guide
Collapse
djnitehawk profile image
Đĵ ΝιΓΞΗΛψΚ

nice post. once you get past the initial learning curve of mongodb, it truly is a thing of beauty imho :-)

Collapse
mdminhazulhaque profile image