REST API

BATCH

Overview

If you have a news item, article or RSS page and you need to select tags so this API would return processed contents (tags) from specified page, you need to construct an API request by following simple rules.

Base URL

http:/demo.tagsreaper.com/TagsReaperUI/api

Methods

The HTTP request GET and POST method is used. Possible URL parameters are: ‘json’, ‘cmd’ and ‘apiToken’. The GET method requires url-encoded value.

URL request parameters

All parameters are requeired

cmd: for BATCH API this parameter is ‘0’ (zero)

apiToken: parameter with token value to API requests. You can generate it fron you Profile if you are already refister at admin.tagsreaper.com

json: generally for URL it looks like this:

{
    "id": 1,
    "crawlerType": 4,
    "maxIterations": "1",
    "items": [
        {
            "siteId": "0",
            "urlContentResponse": null,
            "siteObj": {
                "fetchType": 1,
                "id": "0",
                "uDate": null,
                "tcDate": null,
                "cDate": null,
                "resources": null,
                "iterations": null,
                "description": null,
                "urls": [
                ],
                "filters": [
                ],
                "properties": {
                },
                "state": null,
                "priority": null,
                "maxURLs": null,
                "maxResources": null,
                "maxErrors": null,
                "maxResourceSize": null,
                "requestDelay": null,
                "httpTimeout": null,
                "errorMask": null,
                "errors": null,
                "urlType": null,
                "contents": null,
                "processingDelay": null,
                "size": null,
                "avgSpeed": null,
                "avgSpeedCounter": null,
                "userId": null,
                "recrawlPeriod": null,
                "recrawlDate": null,
                "maxURLsFromPage": null,
                "collectedURLs": null
            },
            "urlObj": {
                "status": 2,
                "linksI": 0,
                "linksE": 0,
                "contentMask": 0,
                "processingTime": 0,
                "CDate": null,
                "mRateCounter": 0,
                "httpTimeout": 10000,
                "size": 0,
                "urlPut": null,
                "batchId": 0,
                "lastModified": null,
                "tagsCount": 0,
                "mRate": 0,
                "charset": "",
                "state": 0,
                "httpCode": 0,
                "priority": 0,
                "maxURLsFromPage": null,
                "processingDelay": 0,
                "crawlingTime": 0,
                "type": 1,
                "processed": 0,
                "totalTime": 0,
                "siteSelect": 0,
                "contentType": "",
                "pDate": null,
                "errorMask": 0,
                "httpMethod": "get",
                "eTag": "",
                "siteId": "0",
                "freq": 0,
                "tcDate": null,
                "rawContentMd5": "",
                "crawled": 0,
                "UDate": null,
                "contentURLMd5": "",
                "requestDelay": 0,
                "depth": 0,
                "parentMd5": "",
                "urlUpdate": null,
                "tagsMask": 0,
                "urlMd5": "b7632cc979f402538f516e48379f9101",
                "url": "http:\/\/www.theguardian.com\/law\/2015\/may\/15\/eurosceptic-david-davis-could-oppose-government-on-human-rights-reform"
            },
            "urlPutObj": {
                "putDict": {
                },
                "urlMd5": "b7632cc979f402538f516e48379f9101",
                "contentType": 0,
                "siteId": "0",
                "fileStorageSuffix": null,
                "criterions": null
            },
            "properties": {
                "DB_TASK_MODE": "RO",
                "HTTP_REDIRECTS_MAX": 5,
                "HTML_REDIRECTS_MAX": 5,
                "HTML_RECOVER": "0",
                "ROBOTS_MODE": "0",
                "PROCESSOR_PROPERTIES": "{\"algorithm\":{\"algorithm_name\":\"user_name_algorithm\"},\"modules\":{\"user_name_algorithm\":[\"ScrapyExtractor\",\"GooseExtractor\",\"NewspaperExtractor\"]}}",
                "template": {
                    "templates": [
                        {
                            "output_format": {
                                "type": "news",
                                "name": "json",
                                "header": "[\n",
                                "items_header": "",
                                "item": "{\n\"pubdate\":\"%pubdate%\",\n\"title\":\"%title%\",\n\"description\":\"%description%\",\n\"media\":\"%media%\",\n\"author\":\"%author%\",\n\"dc_date\":\"%dc_date%\",\n\"link\":\"%link%\",\n\"keywords\":\"%keywords%\",\n\"content_encoded\":\"%content_encoded%\",\n\"html_lang\":\"%html_lang%\",\n\"pubdate_extractor\":\"%pubdate_extractor%\",\n\"title_extractor\":\"%title_extractor%\",\n\"description_extractor\":\"%description_extractor%\",\n\"media_extractor\":\"%media_extractor%\",\n\"author_extractor\":\"%author_extractor%\",\n\"dc_date_extractor\":\"%dc_date_extractor%\",\n\"link_extractor\":\"%link_extractor%\",\n\"keywords_extractor\":\"%keywords_extractor%\",\n\"content_encoded_extractor\":\"%content_encoded_extractor%\",\n\"html_lang_extractor\":\"%html_lang_extractor%\",\n\"crawler_time\":\"%crawler_time%\",\n\"scraper_time\":\"%scraper_time%\",\n\"errors_mask\":\"%errors_mask%\"\n}\n",
                                "items_footer": "",
                                "footer": "]\n"
                            },
                            "tags": {
                                "pubdate": [
                                ],
                                "title": [
                                ],
                                "description": [
                                ],
                                "media": [
                                ],
                                "author": [
                                ],
                                "dc_date": [
                                ],
                                "link": [
                                ],
                                "keywords": [
                                ],
                                "content_encoded": [
                                ],
                                "html_lang": [
                                ],
                                "pubdate_extractor": [
                                ],
                                "title_extractor": [
                                ],
                                "description_extractor": [
                                ],
                                "media_extractor": [
                                ],
                                "author_extractor": [
                                ],
                                "dc_date_extractor": [
                                ],
                                "link_extractor": [
                                ],
                                "keywords_extractor": [
                                ],
                                "content_encoded_extractor": [
                                ],
                                "html_lang_extractor": [
                                ],
                                "crawler_time": [
                                ],
                                "scraper_time": [
                                ],
                                "errors_mask": [
                                ]
                            },
                            "priority": 100,
                            "mandatory": 1,
                            "is_filled": 0
                        }
                    ],
                    "select": "first_nonempty"
                }
            },
            "urlId": "b7632cc979f402538f516e48379f9101"
        }
    ]
}

Just change ‘url’ field to try with another page. But json needs to be encoded. Here we will consider detailed json structure and contents

Request example (after URL encode json)

example

If you open it in browser – you will get the response:

[
{
    "errorCode": 0,
    "errorMessage": "",
    "itemsList": [
        {
            "errorCode": 0,
            "errorMessage": "",
            "host": "localhost",
            "id": 556867002,
            "itemObject": [
                {
                    "contentURLMd5": "",
                    "cookies": [],
                    "dbFields": {
                        "BatchId": 1,
                        "CDate": null,
                        "Charset": "utf-8",
                        "ContentType": "text/html",
                        "ContentURLMd5": "",
                        "Crawled": 1,
                        "CrawlingTime": 63,
                        "ErrorMask": 0,
                        "HttpCode": 200,
                        "LastModified": null,
                        "LinksE": 0,
                        "LinksI": 0,
                        "PDate": "2015-05-15 07:00:08",
                        "Processed": 0,
                        "ProcessingTime": 0,
                        "RawContentMd5": "",
                        "Size": 412838,
                        "Status": 4,
                        "TagsCount": 8,
                        "TagsMask": 1470,
                        "UDate": null
                    },
                    "headers": [],
                    "meta": [],
                    "processedContents": [
                        {
                            "buffer": "Wwp7CiJwdWJkYXRlIjoiMjAxNS0wNS0xNSAwNzowMDowOCIsCiJ0aXRsZSI6IkV1cm9zY2VwdGljIERhdmlkIERhdmlzIGNvdWxkIG9wcG9zZSBnb3Zlcm5tZW50IG9uIGh1bWFuIHJpZ2h0cyByZWZvcm0iLAoiZGVzY3JpcHRpb24iOiJUb3J5IE1Q4oCZcyBjb21tZW50cyBzaG93IGdyb3dpbmcgYmFja2JlbmNoIHJlYmVsbGlvbiBvdmVyIHBsYW4gdGhhdCBjb3VsZCBsZWFkIHRvIHdpdGhkcmF3YWwgZnJvbSBFdXJvcGVhbiBjb3VydCBvZiBodW1hbiByaWdodHMiLAoibWVkaWEiOiIlbWVkaWElIiwKImF1dGhvciI6Ik93ZW4gQm93Y290dCIsCiJkY19kYXRlIjoiMDcuMDDCoEJTVCIsCiJsaW5rIjoiaHR0cDovL3d3dy50aGVndWFyZGlhbi5jb20vbGF3LzIwMTUvbWF5LzE1L2V1cm9zY2VwdGljLWRhdmlkLWRhdmlzLWNvdWxkLW9wcG9zZS1nb3Zlcm5tZW50LW9uLWh1bWFuLXJpZ2h0cy1yZWZvcm0iLAoia2V5d29yZHMiOiJFdXJvcGVhbiBjb3VydCBvZiBodW1hbiByaWdodHMsSHVtYW4gUmlnaHRzIEFjdCxIdW1hbiByaWdodHMsVUsgYmlsbCBvZiByaWdodHMsS2VubmV0aCBDbGFya2UsQ29uc2VydmF0aXZlcyxMYXcsVUsgbmV3cyxEYXZpZCBEYXZpcyxEb21pbmljIEdyaWV2ZSxQb2xpdGljcyBFdXJvcGVhbiBjb3VydCBvZiBodW1hbiByaWdodHMsSHVtYW4gUmlnaHRzIEFjdCxIdW1hbiByaWdodHMsVUsgYmlsbCBvZiByaWdodHMsS2VubmV0aCBDbGFya2UsQ29uc2VydmF0aXZlcyxMYXcsVUsgbmV3cyxEYXZpZCBEYXZpcyxEb21pbmljIEdyaWV2ZSIsCiJjb250ZW50X2VuY29kZWQiOiJPd2VuIEJvd2NvdHQgTGVnYWwgYWZmYWlycyBjb3JyZXNwb25kZW50IEZyaWRheSAxNSBNYXkgMjAxNSAwNy4wMMKgQlNUIExhc3QgbW9kaWZpZWQgb24gRnJpZGF5IDE1IE1heSAyMDE1IDEyLjIxwqBCU1QgVGhlIENvbnNlcnZhdGl2ZSBNUCBEYXZpZCBEYXZpcywgYSBwcm9taW5lbnQgRXVyb3NjZXB0aWMsIGhhcyB0aHJlYXRlbmVkIHRvIG9wcG9zZSBnb3Zlcm5tZW50IHByb3Bvc2FscyB0aGF0IGNvdWxkIGxlYWQgdG8gdGhlIFVLIHdpdGhkcmF3aW5nIGZyb20gdGhlIEV1cm9wZWFuIGNvdXJ0IG9mIGh1bWFuIHJpZ2h0cyAuIFJlbGF0ZWQ6IEh1bWFuIFJpZ2h0cyBBY3QgYWJvbGl0aW9uIHdpbGwgYmUgbm8gJ3F1aWNrIHdpbicgZm9yIHRoZSBUb3JpZXMgRGF2aXPigJlzIHJlcG9ydGVkIGNvbW1lbnRzIGFyZSBhIHNpZ24gb2YgZ3Jvd2luZyByZWJlbGxpb24gb24gdGhlIFRvcnkgYmFja2JlbmNoZXMgYXMgdGhlIGNvbXBsZXhpdHkgYW5kIHBvbGl0aWNhbCBkaWZmaWN1bHRpZXMgaW52b2x2ZWQgaW4gc2VjZWRpbmcgZnJvbSB0aGUganVkaWNpYWwgYXV0aG9yaXR5IG9mIHRoZSBTdHJhc2JvdXJnIGNvdXJ0IGJlY29tZSBpbmNyZWFzaW5nbHkgYXBwYXJlbnQgdG8gdGhlIGdvdmVybm1lbnQuIFRoZSBmb3JtZXIganVzdGljZSBtaW5pc3RlciBLZW4gQ2xhcmtlIGFuZCBmb3JtZXIgYXR0b3JuZXkgZ2VuZXJhbCBEb21pbmljIEdyaWV2ZSBRQyDigJMgYm90aCByZS1lbGVjdGVkIHRvIHRoZSBDb21tb25zIGxhc3Qgd2VlayDigJMgaGF2ZSBpbiB0aGUgcGFzdCB3YXJuZWQgYWJvdXQgdGhlIGRhbmdlciBvZiBkZWZ5aW5nIGRlY2lzaW9ucyBoYW5kZWQgZG93biBieSBFQ0hSIGp1ZGdlcyBvbiB0aGUgZ3JvdW5kcyB0aGF0IGl0IHdvdWxkIHVuZGVybWluZSByZXNwZWN0IGZvciB0aGUgcnVsZSBvZiBsYXcgYWNyb3NzIEV1cm9wZS4gRGF2aXMsIHRoZSBNUCBmb3IgSGFsdGVtcHJpY2UgYW5kIEhvd2RlbiwgdG9sZCBoaXMgbG9jYWwgcGFwZXIsIHRoZSBIdWxsIERhaWx5IE1haWwgOiDigJxJ4oCZbSBhZnJhaWQgd2Ugd2lsbCBjb21lIGludG8gY29uZmxpY3Qgd2l0aCB0aGUgRXVyb3BlYW4gY291cnQgYW5kIEkgZG9u4oCZdCB3YW50IHVzIHRvIGxlYXZlIGl0LiBJZiB3ZSBsZWF2ZSwgaXTigJlzIGFuIGV4Y3VzZSBmb3IgZXZlcnlvbmUgZWxzZSB0byBsZWF2ZS4gU28gSSB0aGluayB0aGF0IGNvdWxkIGJlIHF1aXRlIGFuIGludGVyZXN0aW5nIGFyZ3VtZW50LCBjb21lIHRoZSBkYXkuIEkgdGhpbmsgaXQgaXMgbW9yZSBsaWtlbHkgdGhlcmUgd2lsbCBiZSBhbiBhcmd1bWVudCBvdmVyIHRoYXQgdGhhbiBvdmVyIEV1cm9wZS7igJ0gTGlrZSBDbGFya2UgYW5kIEdyaWV2ZSwgRGF2aXMgc2F5cyBoZSBpcyBpbiBmYXZvdXIgb2YgcmVmb3JtIGJ1dCBvcHBvc2VzIHVuaWxhdGVyYWwgd2l0aGRyYXdhbCBmcm9tIHRoZSBFQ0hSIOKAkyBvbmUgb2YgdGhlIGxpa2VseSBjb25zZXF1ZW5jZXMgb2YgdGhlIHBhcnR54oCZcyBkcmFmdCBiaWxsIG9mIHJpZ2h0cy4gU2VwYXJhdGVseSwgaW4gYW4gb3BlbiBsZXR0ZXIgdG8gdGhlIHByaW1lIG1pbmlzdGVyIHRoZSBJbnRlcm5hdGlvbmFsIEJhciBBc3NvY2lhdGlvbuKAmXMgSHVtYW4gUmlnaHRzIEluc3RpdHV0ZSBwb2ludHMgb3V0IHRoYXQgVG9yeSBwcm9wb3NhbHMgZm9yIGEgQnJpdGlzaCBiaWxsIG9mIHJpZ2h0cyB3aWxsIGxpbWl0IHRoZSBhcHBsaWNhdGlvbiBvZiBodW1hbiByaWdodHMgbGF3cyB0byB0aGUg4oCcbW9zdCBzZXJpb3VzIGNhc2Vz4oCdIGFuZCBleGNsdWRlIHRob3NlIOKAnHdobyBkbyBub3QgZnVsZmlsIHRoZWlyIHJlc3BvbnNpYmlsaXRpZXMgaW4gc29jaWV0eeKAnSAuIFRoZWlyIGludGVydmVudGlvbiBhZGRzIHRvIGEgZ3Jvd2luZyBsaXN0IG9mIHJpZ2h0cyBncm91cHMgb3Bwb3NpbmcgdGhlIG1vdmUuIERhdmlzLCBhIGZvcm1lciBzaGFkb3cgaG9tZSBzZWNyZXRhcnksIGhhcyBpbiB0aGUgcGFzdCBiZWVuIGEgdHJlbmNoYW50IGNyaXRpYyBvZiB0aGUgSHVtYW4gUmlnaHRzIEFjdCAuIFdpdGggYSBtYWpvcml0eSBvZiBvbmx5IDEyLCBEYXZpZCBDYW1lcm9u4oCZcyByb29tIGZvciBwYXJsaWFtZW50YXJ5IG1hbm9ldXZyZSBjb3VsZCBlYXNpbHkgYmUgcmVzdHJpY3RlZCBieSBhIHJlbGF0aXZlbHkgc21hbGwgbnVtYmVyIG9mIHJlYmVscy4gVGhlIG5ldyB0ZWFtIG9mIG1pbmlzdGVycyBhdCB0aGUgTWluaXN0cnkgb2YgSnVzdGljZSBzaSBjb25zaWRlcmluZyBpdHMgb3B0aW9ucyBhbmQgdGhlIGRyYWZ0IGJpbGwgb2YgcmlnaHRzIGRyYXduIHVwIGJ5IE1hcnRpbiBIb3dlIFFDIGFuZCBvdGhlciBsZWdhbCBhZHZpc2Vycy4gUmVsYXRlZDogVGhlIGFyZ3VtZW50cyBhZ2FpbnN0IHRoZSBIdW1hbiBSaWdodHMgQWN0IGFyZSBjb21pbmcuIFRoZXkgd2lsbCBiZSBmYWxzZSB8IEtlaXIgU3Rhcm1lciBUaGUgNDAtcGFnZSBkb2N1bWVudCBpcyB1bmRlcnN0b29kIHRvIGJlIG9uIGl0cyBzZXZlbnRoIGRyYWZ0IOKAkyBhIHJlZmxlY3Rpb24gb2YgaG93IG11Y2ggZWZmb3J0IHdpbGwgYmUgcmVxdWlyZWQgdG8gcmVjb25jaWxlIGFsbCB0aGUgY29uZmxpY3RpbmcgaW50ZXJlc3RzLiBBIHNpZ25pZmljYW50IHByb3BvcnRpb24gb2YgdGhlIHRleHQgaXMgdW5kZXJzdG9vZCB0byBjb25zaXN0IG9mIHNlY3Rpb25zIG9mIHRoZSBFdXJvcGVhbiBjb252ZW50aW9uIG9uIGh1bWFuIHJpZ2h0cywgd2hpY2ggaXQgaXMgaW50ZW5kZWQgd2lsbCBiZSBpbmNvcnBvcmF0ZWQgaW50byBVSyBsYXcuIEJlY2F1c2UgdGhlIGRyYWZ0IGJpbGwgd2FzIGEgQ29uc2VydmF0aXZlIHBhcnR5IGRvY3VtZW50IGFuZCBub3QgYSBwcm9kdWN0IG9mIHRoZSBjb2FsaXRpb24gZ292ZXJubWVudCwgaXQgY291bGQgbm90IGJlIHdvcmtlZCBvbiBieSBjaXZpbCBzZXJ2YW50cyBvciBvZmZpY2lhbCBwYXJsaWFtZW50YXJ5IGRyYXVnaHRzbWVuIGJlZm9yZSB0aGUgZWxlY3Rpb24uIFRoZSBCcml0aXNoIGJpbGwgb2YgcmlnaHRzLCBhcyBpdCBpcyBrbm93biwgd291bGQsIGFjY29yZGluZyB0byB0aGUgVG9yeSBtYW5pZmVzdG8sIOKAnGJyZWFrIHRoZSBmb3JtYWwgbGluayBiZXR3ZWVuIEJyaXRpc2ggY291cnRzIGFuZCB0aGUgRXVyb3BlYW4gY291cnQgb2YgaHVtYW4gcmlnaHRz4oCdLiBKdWRnbWVudHMgZnJvbSBTdHJhc2JvdXJnIHdvdWxkLCBpbiBlZmZlY3QsIGJlY29tZSBhZHZpc29yeSBhbmQgdGhlIFVL4oCZcyBzdXByZW1lIGNvdXJ0IHdpbGwgYmVjb21lIHN1cHJlbWUuIFRoZSBuZXcganVzdGljZSBzZWNyZXRhcnksIE1pY2hhZWwgR292ZSwgd2hvIGxpa2UgaGlzIHByZWRlY2Vzc29yLCBDaHJpcyBHcmF5bGluZywgaXMgbm90IGEgbGF3eWVyLCB3aWxsIGJyaW5nIGEgZnJlc2ggbWluZCB0byB0aGUgaW50ZXJuYXRpb25hbCBjb25zZXF1ZW5jZXMgb2YgZGVmeWluZyBTdHJhc2JvdXJn4oCZcyBqdWRnbWVudHMuIFRoZSBuZXdseSBhcHBvaW50ZWQganVzdGljZSBtaW5pc3RlciwgRG9taW5pYyBSYWFiLCB3aG8gaGFzIHJlc3BvbnNpYmlsaXR5IGZvciBodW1hbiByaWdodHMgYW5kIGNpdmlsIGxpYmVydGllcywgd2lsbCBiZSBkaXJlY3RseSBpbiBjaGFyZ2Ugb2YgcGlsb3RpbmcgdGhlIGxlZ2lzbGF0aW9uIHRocm91Z2ggcGFybGlhbWVudC4gTG9yZCBGYXVsa3MsIHdobyBoYXMgYmVlbiByZWFwcG9pbnRlZCBhcyBhIGp1c3RpY2UgbWluaXN0ZXIgYW5kIHdobyBhbHNvIHN1cHBvcnRzIHRoZSBwcm9wb3NhbHMsIHdpbGwgaGF2ZSB0aGUgY2hhbGxlbmdpbmcgdGFzayBvZiBwcm9wZWxsaW5nIGl0IHRocm91Z2ggYSBIb3VzZSBvZiBMb3JkcyBwYWNrZWQgd2l0aCBMYWJvdXIgYW5kIExpYmVyYWwgRGVtb2NyYXQgb3Bwb25lbnRzLiBUaGUgTWluaXN0cnkgb2YgSnVzdGljZSBzYWlkOiDigJxUaGUgZ292ZXJubWVudCB3YXMgZWxlY3RlZCB3aXRoIGEgbWFuaWZlc3RvIGNvbW1pdG1lbnQgdG8gcmVwbGFjZSB0aGUgSHVtYW4gUmlnaHRzIEFjdCB3aXRoIGEgQnJpdGlzaCBiaWxsIG9mIHJpZ2h0cy4gTWluaXN0ZXJzIHdpbGwgYmUgZGlzY3Vzc2luZyB0aGVpciBwbGFucyBvbiB0aGlzIGFuZCBtYWtpbmcgYW5ub3VuY2VtZW50cyBpbiBkdWUgY291cnNlLuKAnSBUaGUgcGxhbnMgYXJlIGV4cGVjdGVkIHRvIGJlIG1lbnRpb25lZCBpbiB0aGUgUXVlZW7igJlzIHNwZWVjaCBidXQgYSBmaW5hbCBiaWxsIG1heSBub3QgYmUgcHV0IGJlZm9yZSBNUHMgdW50aWwgbGF0ZXIgdGhpcyBwYXJsaWFtZW50LiBPbmUgb2YgdGhlIGRpbGVtbWFzIG1pbmlzdGVycyB3aWxsIGhhdmUgdG8gY29uc2lkZXIgaXMgd2hldGhlciB0byBpbmNvcnBvcmF0ZSB0aGUgRXVyb3BlYW4gY29udmVudGlvbiBpbnRvIFVLIGxhdyBvciBlbWJhcmsgb24gYW4gZXZlbiBtb3JlIGNvbXBsZXggcHJvY2VzcyBvZiBkcmF3aW5nIHVwIGFuIGVudGlyZWx5IGRpZmZlcmVudCBzZXQgb2YgcmlnaHRzLiBUaGUgb3BlbiBsZXR0ZXIgdG8gQ2FtZXJvbiBmcm9tIHRoZSBJbnRlcm5hdGlvbmFsIEJhciBBc3NvY2lhdGlvbuKAmXMgSHVtYW4gUmlnaHRzIEluc3RpdHV0ZSBzYXlzOiDigJxXZSB3b3VsZCBsaWtlIHRvIHJlbWluZCB5b3Ugb2YgdGhlIHVuaXZlcnNhbGl0eSB3aXRoIHdoaWNoIGh1bWFuIHJpZ2h0cyBsYXcgaXMgZW5zaHJpbmVkIGluIGludGVybmF0aW9uYWwgYWdyZWVtZW50cywgdG8gd2hpY2ggdGhlIFVLIGhhcyBhbHdheXMgYmVlbiBhIHdpbGxpbmcgcGFydC4gSHVtYW4gcmlnaHRzIGNhbiBvbmx5IGJlIGFuIGVmZmVjdGl2ZSBtZWNoYW5pc20gZm9yIHByb3RlY3Rpb24gaWYgdGhleSBhcHBseSB0byBhbGwgcGVvcGxlIGluIGFsbCBjYXNlcy7igJ0gU2lnbmVkIGJ5IHRoZSBvcmdhbmlzYXRpb27igJlzIGNvLWNoYWlycywgQmFyb25lc3MgSGVsZW5hIEtlbm5lZHkgUUMgYW5kIGFtYmFzc2Fkb3IgSGFucyBDb3JlbGwsIGEgU3dlZGlzaCBkaXBsb21hdCBhbmQgZm9ybWVyIHVuZGVyLXNlY3JldGFyeSBnZW5lcmFsIGF0IHRoZSBVbml0ZWQgTmF0aW9ucywgdGhlIGFwcGVhbCByZW1pbmRzIHRoZSBwcmltZSBtaW5pc3RlciB0aGF0IEV1cm9wZSBoYXMganVzdCBjZWxlYnJhdGVkIHRoZSBhbm5pdmVyc2FyeSBvZiBWRSBEYXkgdGhlIGJlZ2lubmluZyBvZiBhIHJlY29uY2lsaWF0aW9uIHByb2Nlc3MgdGhhdCBjdWxtaW5hdGVkIGluIHRoZSBlc3RhYmxpc2htZW50IG9mIHRoZSB0aGUgRXVyb3BlYW4gY291cnQgb2YgaHVtYW4gcmlnaHRzIGluIFN0cmFzYm91cmcuIOKAnEh1bWFuIHJpZ2h0cyBjYW4gb25seSBiZSBhbiBlZmZlY3RpdmUgbWVjaGFuaXNtIGZvciBwcm90ZWN0aW9uIGlmIHRoZXkgYXBwbHkgdG8gYWxsIHBlb3BsZSBpbiBhbGwgY2FzZXMs4oCdIHRoZSBvcGVuIGxldHRlciBjb250aW51ZXMuIOKAnE5vIGRpZmZlcmVudGlhdGlvbiBjYW4gb3Igc2hvdWxkIGJlIG1hZGUgYmV0d2VlbiDigJhzZXJpb3Vz4oCZIG9yIOKAmHRyaXZpYWzigJkgY2FzZXMgYW5kIHRob3NlIHdobyBoYXZlIG9yIGhhdmUgbm90IGZ1bGZpbGxlZCBwYXJ0aWN1bGFyIHJlcXVpcmVtZW50cy4gVG8gbWFrZSBzdWNoIGRpZmZlcmVudGlhdGlvbnMgd2l0aGluIGFueSBuZXcgYmlsbCBvZiByaWdodHMgd291bGQgYmUgdG8gcmVtb3ZlIHRoZSBlZmZlY3RpdmVuZXNzIG9mIGFueSBodW1hbiByaWdodHMgbGF3cyBpbiBwcm90ZWN0aW5nIGFsbCBwZW9wbGUsIGluY2x1ZGluZyBtYXJnaW5hbGlzZWQgb3IgdnVsbmVyYWJsZSBpbmRpdmlkdWFscy4gVGhpcyB3b3VsZCBub3QgYmUgbGVhZGVyc2hpcCwgYnV0IGl0cyB2ZXJ5IGFudGl0aGVzaXMu4oCdIiwKImh0bWxfbGFuZyI6IiVodG1sX2xhbmclIiwKInB1YmRhdGVfZXh0cmFjdG9yIjoiU2NyYXB5RXh0cmFjdG9yIiwKInRpdGxlX2V4dHJhY3RvciI6Ikdvb3NlRXh0cmFjdG9yIiwKImRlc2NyaXB0aW9uX2V4dHJhY3RvciI6IlNjcmFweUV4dHJhY3RvciIsCiJtZWRpYV9leHRyYWN0b3IiOiIlbWVkaWFfZXh0cmFjdG9yJSIsCiJhdXRob3JfZXh0cmFjdG9yIjoiU2NyYXB5RXh0cmFjdG9yIiwKImRjX2RhdGVfZXh0cmFjdG9yIjoiU2NyYXB5RXh0cmFjdG9yIiwKImxpbmtfZXh0cmFjdG9yIjoiU2NyYXB5RXh0cmFjdG9yIiwKImtleXdvcmRzX2V4dHJhY3RvciI6IlNjcmFweUV4dHJhY3RvciIsCiJjb250ZW50X2VuY29kZWRfZXh0cmFjdG9yIjoiU2NyYXB5RXh0cmFjdG9yIiwKImh0bWxfbGFuZ19leHRyYWN0b3IiOiIlaHRtbF9sYW5nX2V4dHJhY3RvciUiLAoiY3Jhd2xlcl90aW1lIjoiMC4wNjMiLAoic2NyYXBlcl90aW1lIjoiMC40MzM2MTAyMDA4ODIiLAoiZXJyb3JzX21hc2siOiIwIgp9Cl0K",
                            "cDate": "2015-11-03 15:26:19",
                            "typeId": 10
                        }
                    ],
                    "rawContentMd5": "",
                    "rawContents": [],
                    "requests": [],
                    "siteId": 0,
                    "status": 7,
                    "url": "http://www.theguardian.com/law/2015/may/15/eurosceptic-david-davis-could-oppose-government-on-human-rights-reform",
                    "urlMd5": null
                }
            ],
            "node": "r015_data",
            "port": "5734",
            "time": 1064
        }
     ]
  }
]
Response

If request was successful, the only parameter that you are interested in is buffer. This is base64 encoded processed content and selected tags. After decoding we will get:

[
{
"pubdate":"Fri, 15 May 2015 07:00:08 -0000",
"title":"Eurosceptic David Davis could oppose government on human rights reform",
"description":"Tory MP’s comments show growing backbench rebellion over plan that could lead to withdrawal from European court of human rights",
"media":"//i.guim.co.uk/img/static/sys-images/Guardian/Pix/pictures/2015/5/15/1431685122346/98b03955-6e86-4583-806d-49ed94ff343a-2060x1236.jpeg?w=1200&q=85&auto=format&sharp=10&s=02308b9514a25da2d9b3a04f2106ddbf",
"author":"Owen Bowcott",
"dc_date":"2015-05-15T06:00:08.000Z",
"link":"http://www.theguardian.com/law/2015/may/15/eurosceptic-david-davis-could-oppose-government-on-human-rights-reform",
"keywords":"European court of human rights,Human Rights Act,Human rights,UK bill of rights,Kenneth Clarke,Conservatives,Law,UK news,David Davis,Dominic Grieve,Politics",
"content_encoded":"The Conservative MP David Davis, a prominent Eurosceptic, has threatened to oppose government proposals that could lead to the UK withdrawing from the European court of human rights.\n\nDavis’s reported comments are a sign of growing rebellion on the Tory backbenches as the complexity and political difficulties involved in seceding from the judicial authority of the Strasbourg court become increasingly apparent to the government.\n\nThe former justice minister Ken Clarke and former attorney general Dominic Grieve QC – both re-elected to the Commons last week – have in the past warned about the danger of defying decisions handed down by ECHR judges on the grounds that it would undermine respect for the rule of law across Europe.\n\n\n\nDavis, the MP for Haltemprice and Howden, told his local paper, the Hull Daily Mail: “I’m afraid we will come into conflict with the European court and I don’t want us to leave it. If we leave, it’s an excuse for everyone else to leave. So I think that could be quite an interesting argument, come the day. I think it is more likely there will be an argument over that than over Europe.”\n\nLike Clarke and Grieve, Davis says he is in favour of reform but opposes unilateral withdrawal from the ECHR – one of the likely consequences of the party’s draft bill of rights. \n\n\n\nSeparately, in an open letter to the prime minister the International Bar Association’s Human Rights Institute points out that Tory proposals for a British bill of rights will limit the application of human rights laws to the “most serious cases” and exclude those “who do not fulfil their responsibilities in society”. Their intervention adds to a growing list of rights groups opposing the move.\n\nDavis, a former shadow home secretary, has in the past been a trenchant critic of the Human Rights Act. With a majority of only 12, David Cameron’s room for parliamentary manoeuvre could easily be restricted by a relatively small number of rebels.\n\nThe new team of ministers at the Ministry of Justice si considering its options and the draft bill of rights drawn up by Martin Howe QC and other legal advisers.\n\nThe 40-page document is understood to be on its seventh draft – a reflection of how much effort will be required to reconcile all the conflicting interests. A significant proportion of the text is understood to consist of sections of the European convention on human rights, which it is intended will be incorporated into UK law.\n\nBecause the draft bill was a Conservative party document and not a product of the coalition government, it could not be worked on by civil servants or official parliamentary draughtsmen before the election.\n\nThe British bill of rights, as it is known, would, according to the Tory manifesto, “break the formal link between British courts and the European court of human rights”. Judgments from Strasbourg would, in effect, become advisory and the UK’s supreme court will become supreme.\n\nThe new justice secretary, Michael Gove, who like his predecessor, Chris Grayling, is not a lawyer, will bring a fresh mind to the international consequences of defying Strasbourg’s judgments.\n\nThe newly appointed justice minister, Dominic Raab, who has responsibility for human rights and civil liberties, will be directly in charge of piloting the legislation through parliament. Lord Faulks, who has been reappointed as a justice minister and who also supports the proposals, will have the challenging task of propelling it through a House of Lords packed with Labour and Liberal Democrat opponents.\n\nThe Ministry of Justice said: “The government was elected with a manifesto commitment to replace the Human Rights Act with a British bill of rights. Ministers will be discussing their plans on this and making announcements in due course.”\n\nThe plans are expected to be mentioned in the Queen’s speech but a final bill may not be put before MPs until later this parliament.\n\nOne of the dilemmas ministers will have to consider is whether to incorporate the European convention into UK law or embark on an even more complex process of drawing up an entirely different set of rights.\n\nThe open letter to Cameron from the International Bar Association’s Human Rights Institute says: “We would like to remind you of the universality with which human rights law is enshrined in international agreements, to which the UK has always been a willing part.\n\nHuman rights can only be an effective mechanism for protection if they apply to all people in all cases.”\n\nSigned by the organisation’s co-chairs, Baroness Helena Kennedy QC and ambassador Hans Corell, a Swedish diplomat and former under-secretary general at the United Nations, the appeal reminds the prime minister that Europe has just celebrated the anniversary of VE Day the beginning of a reconciliation process that culminated in the establishment of the the European court of human rights in Strasbourg.\n\n“Human rights can only be an effective mechanism for protection if they apply to all people in all cases,” the open letter continues. “No differentiation can or should be made between ‘serious’ or ‘trivial’ cases and those who have or have not fulfilled particular requirements. To make such differentiations within any new bill of rights would be to remove the effectiveness of any human rights laws in protecting all people, including marginalised or vulnerable individuals. This would not be leadership, but its very antithesis.”",
"html_lang":"%html_lang%",
"pubdate_extractor":"ScrapyExtractor",
"title_extractor":"GooseExtractor",
"description_extractor":"ScrapyExtractor",
"media_extractor":"NewspaperExtractor",
"author_extractor":"ScrapyExtractor",
"dc_date_extractor":"ScrapyExtractor",
"link_extractor":"GooseExtractor",
"keywords_extractor":"GooseExtractor",
"content_encoded_extractor":"GooseExtractor",
"html_lang_extractor":"%html_lang_extractor%",
"crawler_time":"0.372",
"scraper_time":"0.948385000229",
"errors_mask":"0"
}
]
 Possible errors

The parameter that can lead to error (if on requested server all systems working correct) is json: from invalid structure (validate it before make request) to not encoded json contents.

URL_CONTENT

Overview

This operation allows you to extract existing content of chosen URL from DB.

Base URL

http:/demo.tagsreaper.com/TagsReaperUI/api

Methods

The HTTP request GET and POST method used. Possible URL parameters are: “json” and “cmd”. The GET method requires url-encoded value.

URL request parameters

All parameters are requeired

cmd: for BATCH API this parameter is ‘1’ (one)

apiToken: parameter with token value to API requests. You can generate it fron you Profile if you are already refister at admin.tagsreaper.com

json: generally for URL it looks like this:

[
    {
        "contentTypeMask": 253,
        "siteId": "8344182cbfebf030719adf5635a54a09",
        "url": "http://tr.dc4.hce-project.com/api-doc/",
        "urlFetch": null,
        "urlMd5": "b9f5d2c1bc03a8279721a6e606427d48"
    }
]

As you can see, this json is more simple, than for BATCH. In order to start using it just change ‘siteId’, ‘url’ and ‘url Md5’ for corresponding values of those pages that contain processed content in DB.

Request example (after URL encode json)

example

If you open it in browser – you will get the response:

{
    "errorCode": 0,
    "errorMessage": "",
    "itemsList": [
        {
            "errorCode": 0,
            "errorMessage": "",
            "host": "localhost",
            "id": 3137034544,
            "itemObject": [
                {
                    "contentURLMd5": "3d9e3b6a2142587566f694a1f54a0688",
                    "cookies": [],
                    "dbFields": [],
                    "headers": [],
                    "meta": [],
                    "processedContents": [
                        {
                            "buffer": "eyJkZWZhdWx0IjogeyJkYXRhIjogeyJtZXRyaWNzIjogeyJXb3Jkc01ldHJpYyI6IDY0fSwgInJlc0lkIjogIjZlNDg2YTkwNzk5ODNlMmY1Yzk5YzlmZDY0ZWZjZmNkIiwgInRhZ0xpc3QiOiBbW3sieHBhdGgiOiAiIiwgImV4dHJhY3RvciI6ICJHb29zZUV4dHJhY3RvciIsICJkYXRhIjoge30sICJuYW1lIjogImRjX2RhdGUifSwgeyJ4cGF0aCI6ICIvaHRtbC9AbGFuZyIsICJleHRyYWN0b3IiOiAiU2NyYXB5RXh0cmFjdG9yIiwgImRhdGEiOiBbImVuLVVTIl0sICJuYW1lIjogImh0bWxfbGFuZyJ9LCB7InhwYXRoIjogIiIsICJleHRyYWN0b3IiOiAiR29vc2VFeHRyYWN0b3IiLCAiZGF0YSI6IFsiSWYgeW91IG5lZWQgZGVkaWNhdGVkIGluc3RhbGxhdGlvbiBmb3IgZXh0ZW5zaXZlIHVzZSwgeW91IGNhbiBpbnN0YWxsIGFuZCBjb25maWd1cmUgVGFncyBSZWFwZXIgcGFja2FnZXMgb24geW91ciBvd24gc2VydmVyLlxuXG5UYWdzIFJlYXBlciBiYXNlZCBvbiB0aGUgZnJlZSwgb3Blbi1zb3VyY2UgSGllcmFyY2hpY2FsIENsdXN0ZXIgRW5naW5lIChIQ0UpLCBhbGwgb2YgaXRzIGNvbXBvbmVudHMgY2FuIGJlIGRvd25sb2FkZWQgYW5kIGluc3RhbGxlZCBvbiB5b3VyIHNlcnZlciBmcmVlIG9mIGNoYXJnZS5cblxuSXQgdGFrZXMgYWJvdXQgMzAtNjAgbWludXRlcyB0byBkZXBsb3kgeW91ciBkZWRpY2F0ZWQgVFIgaW5zdGFsbGF0aW9uIHRvIHN0YXJ0IGNvbGxlY3RpbmcgZGF0YSBmcm9tIHRoZSB3ZWIuIl0sICJuYW1lIjogImNvbnRlbnRfZW5jb2RlZCJ9LCB7InhwYXRoIjogIiIsICJleHRyYWN0b3IiOiAiR29vc2VFeHRyYWN0b3IiLCAiZGF0YSI6IFsiaHR0cDovL3RyLmRjNC5oY2UtcHJvamVjdC5jb20vc29mdHdhcmUvIl0sICJuYW1lIjogImxpbmsifSwgeyJ4cGF0aCI6ICIiLCAiZXh0cmFjdG9yIjogIkdvb3NlRXh0cmFjdG9yIiwgImRhdGEiOiBbIkRlZGljYXRlZCBJbnN0YWxsYXRpb24iXSwgIm5hbWUiOiAidGl0bGUifV1dfSwgImVycm9yX21lc3NhZ2UiOiAiIiwgImVycm9yX2NvZGUiOiAwLCAidGltZSI6ICIwLjM2NDY1ODgzMjU1In19",
                            "cDate": "2015-08-13 18:44:14",
                            "typeId": 10
                        }
                    ],
                    "rawContentMd5": "",
                    "rawContents": [],
                    "requests": [],
                    "siteId": "e16d48cf6e50e20720fd5cfc3fcf3a95",
                    "status": 7,
                    "url": "http://tr.dc4.hce-project.com/software/",
                    "urlMd5": "6e486a9079983e2f5c99c9fd64efcfcd"
                }
            ],
            "node": "m011_data",
            "port": "5530",
            "time": 140
        }
    ]
}
Response

If request was successful, the only parameter that you are interested in is buffer. This is base64 encoded processed content and selected tags. After decoding we will get:

{
    "default": {
        "data": {
            "metrics": {
                "WordsMetric": 64
            },
            "resId": "6e486a9079983e2f5c99c9fd64efcfcd",
            "tagList": [
                [
                    {
                        "xpath": "",
                        "extractor": "GooseExtractor",
                        "data": {},
                        "name": "dc_date"
                    },
                    {
                        "xpath": "/html/@lang",
                        "extractor": "ScrapyExtractor",
                        "data": [
                            "en-US"
                        ],
                        "name": "html_lang"
                    },
                    {
                        "xpath": "",
                        "extractor": "GooseExtractor",
                        "data": [
                            "If you need dedicated installation for extensive use, you can install and configure Tags Reaper packages on your own server.\n\nTags Reaper based on the free, open-source Hierarchical Cluster Engine (HCE), all of its components can be downloaded and installed on your server free of charge.\n\nIt takes about 30-60 minutes to deploy your dedicated TR installation to start collecting data from the web."
                        ],
                        "name": "content_encoded"
                    },
                    {
                        "xpath": "",
                        "extractor": "GooseExtractor",
                        "data": [
                            "http://tr.dc4.hce-project.com/software/"
                        ],
                        "name": "link"
                    },
                    {
                        "xpath": "",
                        "extractor": "GooseExtractor",
                        "data": [
                            "Dedicated Installation"
                        ],
                        "name": "title"
                    }
                ]
            ]
        },
        "error_message": "",
        "error_code": 0,
        "time": "0.36465883255"
    }
}
 Possible errors

The parameter that can lead to error (if all systems on the requested server are working correctly) is json: from invalid structure (validate it before make request) to not encoded json contents. In other cases you will get responce from server.

© 2015-2016 TagsReaper. All rights reserved.