DEV Community

ILshat Khamitov
ILshat Khamitov

Posted on

Create scraper plugin for telegram bot on nestjs

Create empty plugin

Add new library plugins

npm run -- nx g @nrwl/nest:lib plugins/server

endy@endy-virtual-machine:~/Projects/current/kaufman-bot$ npm run -- nx g @nrwl/nest:lib plugins/server

> kaufman-bot@0.0.0 nx
> nx "g" "@nrwl/nest:lib" "plugins/server"

CREATE libs/plugins/server/README.md
CREATE libs/plugins/server/.babelrc
CREATE libs/plugins/server/src/index.ts
CREATE libs/plugins/server/tsconfig.json
CREATE libs/plugins/server/tsconfig.lib.json
UPDATE tsconfig.base.json
CREATE libs/plugins/server/project.json
UPDATE workspace.json
CREATE libs/plugins/server/.eslintrc.json
CREATE libs/plugins/server/jest.config.js
CREATE libs/plugins/server/tsconfig.spec.json
CREATE libs/plugins/server/src/lib/plugins-server.module.ts
Enter fullscreen mode Exit fullscreen mode

Create config interface file ./libs/plugin/server/src/lib/scraper/scraper-config/scraper.config.ts

export const SCRAPER_CONFIG = 'SCRAPER_CONFIG';

export interface ScraperConfig {
  uri: string;
  timeout?: number;
  contentSelector: string;
  contentLength?: number;
  contentCodepage?: string;
  spyWords: string[];
  removeWords?: string[];
  help: string;
  helpLocale?: { [locale: string]: string };
}
Enter fullscreen mode Exit fullscreen mode

Add enum commands for work plugin libs/plugin/server/src/lib/scraper/scraper-types/scraper-commands.ts

export enum ScraperCommandsEnum {
  help = 'help',
}
Enter fullscreen mode Exit fullscreen mode

Add plugin service without logic libs/plugins/server/src/lib/scraper/scraper-services/scraper.service.ts

import { Inject, Injectable, Logger } from '@nestjs/common';
import { Message as Msg, On, Update } from 'nestjs-telegraf';
import {
  ScraperConfig,
  SCRAPER_CONFIG,
} from '../scraper-config/scraper.config';
import { ScraperCommandsEnum } from '../scraper-types/scraper-commands';

@Update()
@Injectable()
export class ScraperService {
  private readonly logger = new Logger(ScraperService.name);

  constructor(
    @Inject(SCRAPER_CONFIG)
    private readonly scraperConfig: ScraperConfig
  ) {}

  @On('text')
  async onMessage(@Msg() msg) {
    const locale = msg.from?.language_code || null;
    const spyWord = this.scraperConfig.spyWords.find((spyWord) =>
      msg.text.toLowerCase().includes(spyWord.toLowerCase())
    );
    if (spyWord) {
      if (
        msg.text.includes(`/${ScraperCommandsEnum.help}`) ||
        msg.text.includes(ScraperCommandsEnum.help)
      ) {
        const replayHelpMessage =
          (locale && this.scraperConfig.helpLocale?.[locale]) ||
          this.scraperConfig.help;
        return replayHelpMessage;
      }

      const preparedText = msg.text
        .split(spyWord)
        .join('')
        .split('  ')
        .join('')
        .trim();

      const replayMessage = await this.scrap(preparedText);

      if (replayMessage) {
        return replayMessage;
      }

      this.logger.warn(`Unhandled commands for text: "${msg.text}"`);
      this.logger.debug(msg);
    }
    return null;
  }

  private scrap(text: string) {
    return Promise.resolve(text);
  }
}
Enter fullscreen mode Exit fullscreen mode

Update module file, add service libs/plugins/server/src/lib/scraper/scraper.module.ts

import { DynamicModule, Module } from '@nestjs/common';
import { ScraperConfig, SCRAPER_CONFIG } from './scraper-config/scraper.config';
import { ScraperService } from './scraper-services/scraper.service';

@Module({})
export class ScraperModule {
  static forRoot(config: ScraperConfig): DynamicModule {
    return {
      module: ScraperModule,
      providers: [
        {
          provide: SCRAPER_CONFIG,
          useValue: config,
        },
        ScraperService,
      ],
    };
  }
}
Enter fullscreen mode Exit fullscreen mode

Generate all need files for include lib to app

npm run generate

endy@endy-virtual-machine:~/Projects/current/kaufman-bot$ npm run generate

> kaufman-bot@0.0.0 generate
> npm run rucken -- prepare && npm run lint:fix


> kaufman-bot@0.0.0 rucken
> rucken "prepare"

[2022-02-20T15:17:21.816] [INFO] MakeTsListService: prepare - Start create list files...
[2022-02-20T15:17:21.818] [INFO] MakeTsListService: prepare - Config: {"indexFileName":"index","excludes":["*node_modules*","*public_api.ts*","*.spec*","environment*","*test*","*e2e*","*.stories.ts","*.d.ts"]}
[2022-02-20T15:17:21.819] [DEBUG] MakeTsListService: prepare - Process library "core-server" in libs/core/server/src
[2022-02-20T15:17:21.819] [DEBUG] MakeTsListService: prepare - Process library "plugins-server" in libs/plugins/server/src
[2022-02-20T15:17:21.819] [INFO] MakeTsListService: prepare - End of create list files...
[2022-02-20T15:17:21.819] [INFO] VersionUpdaterService: prepare - Start update versions...
[2022-02-20T15:17:21.820] [INFO] VersionUpdaterService: prepare - Config: {"updatePackageVersion":true}
[2022-02-20T15:17:21.820] [DEBUG] VersionUpdaterService: prepare - Process project "core-server" in libs/core/server
[2022-02-20T15:17:21.820] [INFO] VersionUpdaterService: prepare - Start for {"rootConfigPath":"/home/endy/Projects/current/kaufman-bot/package.json","appConfigPath":"libs/core/server/package.json"}
[2022-02-20T15:17:21.821] [INFO] VersionUpdaterService: prepare - End of for {"rootConfigPath":"/home/endy/Projects/current/kaufman-bot/package.json","appConfigPath":"libs/core/server/package.json"}
[2022-02-20T15:17:21.821] [DEBUG] VersionUpdaterService: prepare - Process project "plugins-server" in libs/plugins/server
[2022-02-20T15:17:21.821] [INFO] VersionUpdaterService: prepare - Start for {"rootConfigPath":"/home/endy/Projects/current/kaufman-bot/package.json","appConfigPath":"libs/plugins/server/package.json"}
[2022-02-20T15:17:21.822] [INFO] VersionUpdaterService: prepare - End of for {"rootConfigPath":"/home/endy/Projects/current/kaufman-bot/package.json","appConfigPath":"libs/plugins/server/package.json"}
[2022-02-20T15:17:21.822] [DEBUG] VersionUpdaterService: prepare - Process project "server" in apps/server
[2022-02-20T15:17:21.822] [INFO] VersionUpdaterService: prepare - Start for {"rootConfigPath":"/home/endy/Projects/current/kaufman-bot/package.json","appConfigPath":"apps/server/package.json"}
[2022-02-20T15:17:21.822] [INFO] VersionUpdaterService: prepare - Error Wrong body of file apps/server/package.json
[2022-02-20T15:17:21.823] [INFO] VersionUpdaterService: prepare - End of for {"rootConfigPath":"/home/endy/Projects/current/kaufman-bot/package.json","appConfigPath":"apps/server/package.json"}
[2022-02-20T15:17:21.823] [INFO] VersionUpdaterService: prepare - End of update versions...
[2022-02-20T15:17:21.823] [INFO] Extracti18nService: prepare - Start create translate files...
[2022-02-20T15:17:21.823] [INFO] Extracti18nService: prepare - Config: {"locales":["en"],"markers":["getText","dictionary"]}
[2022-02-20T15:17:21.823] [INFO] Extracti18nService: prepare - Process applications...
[2022-02-20T15:17:21.823] [DEBUG] Extracti18nService: prepare - server apps/server/src
[2022-02-20T15:17:22.757] [INFO] Extracti18nService: prepare - Process libraries...
[2022-02-20T15:17:22.758] [DEBUG] Extracti18nService: prepare - core-server libs/core/server/src
[2022-02-20T15:17:23.651] [DEBUG] Extracti18nService: prepare - plugins-server libs/plugins/server/src
[2022-02-20T15:17:24.987] [INFO] Extracti18nService: prepare - End of create translate files...
[2022-02-20T15:17:24.987] [INFO] GettextService: prepare - Start create translate files...
[2022-02-20T15:17:24.987] [INFO] GettextService: prepare - Config: {"po2jsonOptions":{"format":"mf"},"pattern":"**/*.@(ts|js|tsx|jsx)","locales":["en"],"defaultLocale":"en","markers":["getText","dictionary"]}
[2022-02-20T15:17:24.988] [DEBUG] GettextService: prepare - core-server libs/core/server/src
[2022-02-20T15:17:25.011] [DEBUG] GettextService: prepare - plugins-server libs/plugins/server/src
[2022-02-20T15:17:25.037] [DEBUG] GettextService: prepare - server apps/server/src
[2022-02-20T15:17:25.057] [INFO] GettextService: prepare - End of create translate files...
[2022-02-20T15:17:25.057] [INFO] Extracti18nService: prepare - Start create translate files...
[2022-02-20T15:17:25.057] [INFO] Extracti18nService: prepare - Config: {"locales":["en"],"markers":["getText","dictionary"]}
[2022-02-20T15:17:25.058] [INFO] Extracti18nService: prepare - Process applications...
[2022-02-20T15:17:25.058] [DEBUG] Extracti18nService: prepare - server apps/server/src
[2022-02-20T15:17:25.961] [INFO] Extracti18nService: prepare - Process libraries...
[2022-02-20T15:17:25.961] [DEBUG] Extracti18nService: prepare - core-server libs/core/server/src
[2022-02-20T15:17:26.716] [DEBUG] Extracti18nService: prepare - plugins-server libs/plugins/server/src
[2022-02-20T15:17:28.030] [INFO] Extracti18nService: prepare - End of create translate files...

> kaufman-bot@0.0.0 lint:fix
> npm run tsc:lint && nx workspace-lint --fix && nx run-many --target=lint --all --fix && nx format:write --all


> kaufman-bot@0.0.0 tsc:lint
> tsc --noEmit -p tsconfig.base.json


    ✔  nx run plugins-server:lint (1s)
    ✔  nx run core-server:lint (1s)
    ✔  nx run server:lint  [local cache]

 ————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————————

 >  NX   Successfully ran target lint for 3 projects (2s)

         With additional flags:
           --fix=true

   Nx read the output from the cache instead of running the command for 1 out of 3 tasks.


 >  NX   Running affected:* commands with --all can result in very slow builds.

   --all is not meant to be used for any sizable project or to be used in CI.

   Learn more about checking only what is affected: https://nx.dev/latest/angular/cli/affected#affected.

.eslintrc.json 17ms
.prettierrc 16ms
.vscode/extensions.json 2ms
apps/server/.eslintrc.json 5ms
apps/server/jest.config.js 11ms
apps/server/project.json 6ms
apps/server/src/app/app.controller.spec.ts 29ms
apps/server/src/app/app.controller.ts 8ms
apps/server/src/app/app.module.ts 9ms
apps/server/src/app/app.service.spec.ts 9ms
apps/server/src/app/app.service.ts 19ms
apps/server/src/assets/i18n/en.json 2ms
apps/server/src/assets/i18n/en.vendor.json 3ms
apps/server/src/environments/environment.prod.ts 4ms
apps/server/src/environments/environment.ts 4ms
apps/server/src/main.ts 16ms
apps/server/tsconfig.app.json 6ms
apps/server/tsconfig.json 4ms
apps/server/tsconfig.spec.json 4ms
jest.config.js 5ms
jest.preset.js 2ms
libs/core/server/.babelrc 3ms
libs/core/server/.eslintrc.json 2ms
libs/core/server/jest.config.js 5ms
libs/core/server/package.json 3ms
libs/core/server/project.json 3ms
libs/core/server/README.md 22ms
libs/core/server/src/i18n/en.json 1ms
libs/core/server/src/index.ts 3ms
libs/core/server/src/lib/core-server.module.ts 3ms
libs/core/server/tsconfig.json 3ms
libs/core/server/tsconfig.lib.json 3ms
libs/core/server/tsconfig.spec.json 3ms
libs/plugins/server/.babelrc 2ms
libs/plugins/server/.eslintrc.json 2ms
libs/plugins/server/jest.config.js 5ms
libs/plugins/server/package.json 2ms
libs/plugins/server/project.json 6ms
libs/plugins/server/README.md 6ms
libs/plugins/server/src/i18n/en.json 1ms
libs/plugins/server/src/index.ts 2ms
libs/plugins/server/src/lib/plugins-server.module.ts 7ms
libs/plugins/server/src/lib/scraper/scraper-config/scraper.config.ts 5ms
libs/plugins/server/src/lib/scraper/scraper-services/scraper.service.ts 20ms
libs/plugins/server/src/lib/scraper/scraper-types/scraper-commands.ts 3ms
libs/plugins/server/src/lib/scraper/scraper.module.ts 5ms
libs/plugins/server/tsconfig.json 3ms
libs/plugins/server/tsconfig.lib.json 3ms
libs/plugins/server/tsconfig.spec.json 3ms
migrations.json 5ms
nx.json 3ms
package-lock.json 227ms
package.json 2ms
README.md 44ms
STEPS.md 105ms
tools/tsconfig.tools.json 4ms
transloco.config.js 11ms
transloco.config.json 4ms
tsconfig.base.json 4ms
workspace.json 4ms
Enter fullscreen mode Exit fullscreen mode

Add ScraperModule to main AppModule

import { ScraperModule } from '@kaufman-bot/plugins/server';
import { Module } from '@nestjs/common';
import env from 'env-var';
import { TelegrafModule } from 'nestjs-telegraf';
import { AppController } from './app.controller';
import { AppService } from './app.service';

@Module({
  imports: [
    TelegrafModule.forRoot({
      token: env.get('TELEGRAM_BOT_TOKEN').required().asString(),
    }),
    ScraperModule.forRoot({
      contentCodepage: '',
      contentLength: 0,
      contentSelector: '',
      help: 'Scraper help message',
      spyWords: ['scraper'],
      timeout: 0,
      uri: '',
    }),
  ],
  controllers: [AppController],
  providers: [AppService],
})
export class AppModule {}
Enter fullscreen mode Exit fullscreen mode

Test from telegram

IL'shat Khamitov, [20.02.2022 18:28]
scraper /help

KaufmanBotDevelop, [20.02.2022 18:28]
Scraper help message

IL'shat Khamitov, [20.02.2022 18:29]
scraper help

KaufmanBotDevelop, [20.02.2022 18:29]
Scraper help message

IL'shat Khamitov, [20.02.2022 18:29]
scraper trim message please

KaufmanBotDevelop, [20.02.2022 18:29]
trim message please
Enter fullscreen mode Exit fullscreen mode

Image description

Add logic for scrap and use it for get rate for USD/EUR

Install all need deps

npm install --save axios cheerio html-to-text jschardet encoding charset mustache

endy@endy-virtual-machine:~/Projects/current/kaufman-bot$ npm install --save axios cheerio html-to-text jschardet encoding charset mustache

added 15 packages, and audited 883 packages in 6s

109 packages are looking for funding
  run `npm fund` for details

found 0 vulnerabilities
Enter fullscreen mode Exit fullscreen mode

Update service libs/plugins/server/src/lib/scraper/scraper-services/scraper.service.ts

import { Inject, Injectable, Logger } from '@nestjs/common';
import axios from 'axios';
import charset from 'charset';
import cheerio from 'cheerio';
import encoding from 'encoding';
import htmlToText from 'html-to-text';
import jschardet from 'jschardet';
import { render } from 'mustache';
import { Message as Msg, On, Update } from 'nestjs-telegraf';
import {
  ScraperConfig,
  SCRAPER_CONFIG,
} from '../scraper-config/scraper.config';
import { ScraperCommandsEnum } from '../scraper-types/scraper-commands';

@Update()
@Injectable()
export class ScraperService {
  private readonly logger = new Logger(ScraperService.name);

  constructor(
    @Inject(SCRAPER_CONFIG)
    private readonly scraperConfig: ScraperConfig
  ) {}

  @On('text')
  async onMessage(@Msg() msg) {
    const locale = msg.from?.language_code || null;
    const spyWord = this.scraperConfig.spyWords.find((spyWord) =>
      msg.text.toLowerCase().includes(spyWord.toLowerCase())
    );
    if (spyWord) {
      if (
        msg.text.includes(`/${ScraperCommandsEnum.help}`) ||
        msg.text.includes(ScraperCommandsEnum.help)
      ) {
        const replayHelpMessage =
          (locale && this.scraperConfig.helpLocale?.[locale]) ||
          this.scraperConfig.help;
        return replayHelpMessage;
      }

      const preparedText = msg.text
        .split(spyWord)
        .join('')
        .split('  ')
        .join('')
        .trim();

      const replayMessage = await this.scrap(locale, preparedText);

      if (replayMessage) {
        return replayMessage;
      }

      this.logger.warn(`Unhandled commands for text: "${msg.text}"`);
      this.logger.debug(msg);
    }
    return null;
  }

  private async scrap(locale: string, text: string) {
    /*const parsedVariables = parse(this.scraperConfig.uri)
      .filter((arr) => arr[0] === 'name')
      .map((arr) => arr[1]);
    const otherText = text;*/

    const replaceVariables = { text: encodeURIComponent(text.trim()), locale };
    (this.scraperConfig.removeWords || []).forEach((removeWord: string) => {
      text = text
        .replace(new RegExp(removeWord, 'ig'), '')
        .replace(new RegExp(' {2}', 'ig'), ' ')
        .trim();
    });
    const textArray = text.split(' ');
    if (textArray.length > 0) {
      textArray.forEach((textArrayItem: string, textArrayIndex: number) => {
        replaceVariables[`text ${textArrayIndex + 1}`] = textArrayItem;
      });
      textArray.forEach((textArrayItem: string, textArrayIndex: number) => {
        replaceVariables[`text${textArrayIndex + 1}`] =
          textArrayItem.toLowerCase();
      });
      textArray.forEach((textArrayItem: string, textArrayIndex: number) => {
        replaceVariables[`TEXT${textArrayIndex + 1}`] =
          textArrayItem.toUpperCase();
      });
    }
    const repalcedUri = render(this.scraperConfig.uri, replaceVariables);
    // const replacedText = render(text, replaceVariables);

    const axiosInstance = axios.create({
      timeout: this.scraperConfig.timeout,
      responseEncoding: this.scraperConfig.contentCodepage || 'binary',
    });

    try {
      const response = await axiosInstance.get(repalcedUri);
      const $ = cheerio.load(response.data);
      let content = this.scraperConfig.contentSelector
        .split(',')
        .map((selector: string) => htmlToText.fromString($(selector).html()))
        .join('\n\n');

      const enc =
        charset(response.headers, response.data) ||
        jschardet.detect(response.data).encoding.toLowerCase();

      if (enc !== 'utf8') {
        content = encoding
          .convert(Buffer.from(content, 'binary'), 'utf8', enc, true)
          .toString('utf8');
      }
      return content;
    } catch (err) {
      this.logger.error(err, err.stack);
      return err.toString();
    }
  }
}
Enter fullscreen mode Exit fullscreen mode

Update app module

import { ScraperModule } from '@kaufman-bot/plugins/server';
import { Module } from '@nestjs/common';
import env from 'env-var';
import { TelegrafModule } from 'nestjs-telegraf';
import { AppController } from './app.controller';
import { AppService } from './app.service';

@Module({
  imports: [
    TelegrafModule.forRoot({
      token: env.get('TELEGRAM_BOT_TOKEN').required().asString(),
    }),
    ScraperModule.forRoot({
      contentSelector:
        '#__next > div:nth-child(2) > div.fluid-container__BaseFluidContainer-qoidzu-0.gJBOzk > section > div:nth-child(2) > div > main > form > div:nth-child(2) > div:nth-child(1) > p.result__BigRate-sc-1bsijpp-1.iGrAod',
      help: 'Scraper help message',
      spyWords: ['scraper'],
      removeWords: ['to', 'convert', 'please'],
      uri: 'https://www.xe.com/currencyconverter/convert/?Amount={{TEXT1}}&From={{TEXT2}}&To={{TEXT3}}',
    }),
  ],
  controllers: [AppController],
  providers: [AppService],
})
export class AppModule {}
Enter fullscreen mode Exit fullscreen mode

Test from telegram

Image description

Top comments (0)