In this notebook, we will assume that we’re running a company selling a product - vacuum cleaners -, and we want to monitor our competitors, by looking at the positive and negative reviews that their products are receiving and latest product announcements.

Access the cookbook in Github

Generate an API Key

To get started, generated an API key in the Twin dashboard and substitute the value of the variable twin_key below.

import requests
import time

twin_key = "<your api key>"
browse_endpoint = "https://api.twin.so/browse"
task_endpoint = "http://api.twin.so/task/"

Define the websites we want to navigate

name_of_competitors = ["Dyson", "Shark", "LG", "Samsung"]

We’ll define a helper function that waits until the task is completed. The /browse endpoint is asynchronous, therefore we need to poll /task/{taskId} until the task is completed

def wait_for_tasks(task_ids):
    outputs = []
    latest_actions = {task_id: None for task_id in task_ids}
    remaining_tasks = set(task_ids)

    while remaining_tasks:
        for task_id in list(remaining_tasks):
            response = requests.get(url=task_endpoint + task_id + "?limit=1", headers={"x-api-key": twin_key})
            data = response.json()
            
            if data["status"] in {"COMPLETED", "FAILED"}:
                outputs.append(data["output"])
                remaining_tasks.remove(task_id)
            elif data["steps"] and data["steps"][0]["action"] and latest_actions[task_id] != data["steps"][0]["action"]:
                latest_actions[task_id] = data["steps"][0]["action"]
                print(f"Task {task_id}: {latest_actions[task_id]}")
        
        time.sleep(1)
    
    return outputs

Amazon reviews

For the first step, we’ll check Amazon to find the reviews

def launch_amazon_task():
    amazon_task_ids = []

    for competitor in name_of_competitors:
        print(f"Getting reviews for {competitor}")
        response = requests.post(
            url=browse_endpoint,
            headers={"x-api-key": twin_key},
            json={
                "goal": f"Find the latest reviews for {competitor}'s most popular vacuum cleaner - summarize the points made in the most positive and negative ones",
                "startUrl": "https://www.amazon.com/",
            },
        )
        amazon_task_ids.append(response.json()["taskId"])
    return amazon_task_ids

amazon_task_ids = launch_amazon_task()
amazon_reviews = wait_for_tasks(amazon_task_ids)
for review, competitor in zip(amazon_reviews, name_of_competitors):
    print(f"# Reviews for {competitor}")
    print(review)
    print("-"*100)
    print("\n\n")

Latest announcements

Now we’ll find what are the latest announcements by each company using Google News

def launch_latest_announcements_task():
    latest_announcements_task_ids = []

    for competitor in name_of_competitors:
        print(f"Getting latest announcements for {competitor}")
        response = requests.post(
            url=browse_endpoint,
            headers={"x-api-key": twin_key},
            json={
                "goal": f"Find the latest product announcements of {competitor}, by searching for '{competitor} product announcements",
                "startUrl": "https://news.google.com/",
            },
        )
        latest_announcements_task_ids.append(response.json()["taskId"])
    return latest_announcements_task_ids

latest_announcements_task_ids = launch_latest_announcements_task()
latest_announcements = wait_for_tasks(latest_announcements_task_ids)
for announcement in latest_announcements:
    print(announcement)
    print("\n\n")