DEV Community

Cover image for Scrapping Bybit Kline data with Pybit
Kyle Foo
Kyle Foo

Posted on • Updated on

Scrapping Bybit Kline data with Pybit

At the time of writing this, I'm using python Pybit v5 library for scrapping Bybit kline data. Following python codes work for 1min kline scrapping, do modify the code accordingly based the interval you wish to scrap.

import pandas as pd
from datetime import date, datetime, timedelta

from pybit.unified_trading import HTTP
import numpy as np
import dateparser
import time
import math

session = HTTP(testnet=False, api_key=API_KEY, api_secret=SECRET_KEY)

def GetHistoricalData(currency, start_date, end_date, interval):
    start_time = dateparser.parse(start_date)
    end_time = dateparser.parse(end_date)
    start_ts = int(start_time.timestamp()*1000) #ms
    end_ts = int(end_time.timestamp()*1000) #ms

    page = 0
    # we will query for 2 pages per day, with limit of 720 bar returned every call, we then get 1440min for every 2 pages
    # note that API call below will use 720 as limit because that's easier for pagination
    # hence total pages = num of days * 2
    totalPage = math.ceil(start_ts - end_ts / (1000 * 60 * 60 * 24)) * 2 
    df = pd.DataFrame(columns=['startTime', 'openPrice', 'highPrice', 'lowPrice', 'closePrice', 'volume'])

    while True:
        # Kline API call, 1min interval is the smallest bar we can get, limit max at 1000 but I use 720 here
        bars = session.get_kline(symbol=currency, interval=str(interval), start=start_ts, category="linear", limit=720)
        # counter starts from end of list then goes backward since oldest data appear last
        idx = len(bars['result']['list']) - 1

        while True:
            bar = bars['result']['list'][idx]
            new_row = {'startTime': pd.to_datetime(int(bar[0]), unit= 'ms'), 'openPrice': bar[1], 'highPrice': bar[2], 'lowPrice': bar[3], 'closePrice': bar[4], 'volume': bar[5]}
            # if the kline bar exceed end_time, means reaches target date, break out of loop
            if int(bar[0]) > end_ts:
                start_ts = end_ts
            # append the new row into dataframe
            df.loc[len(df)] = new_row
            # check if we reaches first item in array
            if idx == 0:
                start_ts = start_ts + 43200*1000 # increment by num of ms for a day to get start time for tmr
            idx -= 1
        page += 1
        # check if the end_date is reached
        if start_ts >= end_ts:
            # exit the while loop
        if page > totalPage:
        # time delay to prevent api rate limit

    return df
Enter fullscreen mode Exit fullscreen mode

then execute the function with symbol, intended start time and end time, as well as interval of 1min

data = GetHistoricalData("BTCUSDT", "May 01, 2023 00:00 UTC", "July 31, 2023 23:59 UTC", 1)
data.to_csv("./{}_May-01-2023-July-31-2023.csv".format("BTCUSDT"), index=False)
Enter fullscreen mode Exit fullscreen mode

See for API documentation.

Top comments (0)