All pages
Powered by GitBook
1 of 1

Loading...

Crawl

Start a crawl job

post
Authorizations
Body
urlstringRequired
maxPagesinteger · min: 1Optional
followLinksbooleanOptionalDefault: true
ignoreSitemapbooleanOptionalDefault: false
excludePatternsstring[]Optional
includePatternsstring[]Optional
Responses
200
Crawl job started successfully
application/json
400
Invalid request parameters
application/json
500
Server error
application/json
post
POST /api/crawl HTTP/1.1
Host: api.hyperbrowser.ai
x-api-key: YOUR_API_KEY
Content-Type: application/json
Accept: */*
Content-Length: 1045

{
  "url": "text",
  "maxPages": 1,
  "followLinks": true,
  "ignoreSitemap": false,
  "excludePatterns": [
    "text"
  ],
  "includePatterns": [
    "text"
  ],
  "sessionOptions": {
    "useStealth": false,
    "useProxy": false,
    "proxyServer": "text",
    "proxyServerPassword": "text",
    "proxyServerUsername": "text",
    "proxyCountry": "AD",
    "proxyState": "AL",
    "proxyCity": "new york",
    "operatingSystems": [
      "windows"
    ],
    "device": [
      "desktop"
    ],
    "platform": [
      "chrome"
    ],
    "locales": [
      "aa"
    ],
    "screen": {
      "width": 1280,
      "height": 720
    },
    "solveCaptchas": false,
    "adblock": false,
    "trackers": false,
    "annoyances": false,
    "enableWebRecording": true,
    "enableVideoWebRecording": false,
    "profile": {
      "id": "text",
      "persistChanges": true
    },
    "acceptCookies": true,
    "extensionIds": [
      "123e4567-e89b-12d3-a456-426614174000"
    ],
    "urlBlocklist": [
      "text"
    ],
    "browserArgs": [
      "text"
    ],
    "imageCaptchaParams": [
      {
        "imageSelector": "text",
        "inputSelector": "text"
      }
    ],
    "timeoutMinutes": 1
  },
  "scrapeOptions": {
    "formats": [
      "html"
    ],
    "includeTags": [
      "text"
    ],
    "excludeTags": [
      "text"
    ],
    "onlyMainContent": true,
    "waitFor": 0,
    "timeout": 30000,
    "waitUntil": "load",
    "screenshotOptions": {
      "fullPage": false,
      "format": "webp"
    }
  }
}
{
  "jobId": "text"
}

Get crawl job status

get
Authorizations
Path parameters
idstring · uuidRequired
Responses
200
Crawl job status
application/json
404
Crawl job not found
application/json
500
Server error
application/json
get
GET /api/crawl/{id}/status HTTP/1.1
Host: api.hyperbrowser.ai
x-api-key: YOUR_API_KEY
Accept: */*
{
  "status": "pending"
}

Get crawl job status and results

get
Authorizations
Path parameters
idstringRequired
Query parameters
pageintegerOptional
batchSizeinteger · min: 1Optional
Responses
200
Crawl job details retrieved successfully
application/json
404
Crawl job not found
application/json
500
Server error
application/json
get
GET /api/crawl/{id} HTTP/1.1
Host: api.hyperbrowser.ai
x-api-key: YOUR_API_KEY
Accept: */*
{
  "jobId": "123e4567-e89b-12d3-a456-426614174000",
  "status": "pending",
  "error": "text",
  "totalCrawledPages": 1,
  "totalPageBatches": 1,
  "currentPageBatch": 1,
  "batchSize": 1,
  "data": [
    {
      "url": "text",
      "status": "completed",
      "error": "text",
      "metadata": {
        "ANY_ADDITIONAL_PROPERTY": "text"
      },
      "markdown": "text",
      "html": "text",
      "links": [
        "text"
      ],
      "screenshot": "text"
    }
  ]
}