Amazon BrowseNode Browser

I’m working on a project that required a way to iteratively go through Amazon BrowseNodes. To do that I wanted to do a breadth first search through the tree and came up with a rather nice way to do that in Python.

There are a few resources that can be useful for finding browsenodes. The BrowseNodes.com website provides a crude way to browse through browsenodes. But it doesn’t offer the kind of control you might need in your own application.

Here’s a Python script that will print out browseNodes breadth first starting from the root node for books.

#!/usr/bin/env python
"""
Created by Matt Warren on 2009-09-08.
Copyright (c) 2009 HalOtis.com. All rights reserved.
"""
import time
import urllib
 
try:
    from xml.etree import ElementTree
except ImportError:
    from elementtree import ElementTree
 
from boto.connection import AWSQueryConnection
 
AWS_ACCESS_KEY_ID = 'YOUR ID'
AWS_ASSOCIATE_TAG = 'YOUR TAG'
AWS_SECRET_ACCESS_KEY = 'YOUR KEY'
 
BROWSENODES = {}
 
def bfs(root,children=iter):
    queue = [root, ]
    visited = list(set([]))
 
    while len(queue) > 0:
        node = queue.pop(0)
        visited.append(node)
        yield node
 
        for child in children(node):
            if not child in visited:
                queue.append(child)
    return
 
 
def amazon_browsenodelookup_children(nodeId, searchIndex='Books'):
    aws_conn = AWSQueryConnection(
        aws_access_key_id=AWS_ACCESS_KEY_ID,
        aws_secret_access_key=AWS_SECRET_ACCESS_KEY, is_secure=False,
        host='ecs.amazonaws.com')
    aws_conn.SignatureVersion = '2'
    params = dict(
        Service='AWSECommerceService',
        Version='2009-07-01',
        SignatureVersion=aws_conn.SignatureVersion,
        AWSAccessKeyId=AWS_ACCESS_KEY_ID,
        AssociateTag=AWS_ASSOCIATE_TAG,
        Operation='BrowseNodeLookup',
        SearchIndex=searchIndex,
        BrowseNodeId=nodeId,
        Timestamp=time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime()))
    verb = 'GET'
    path = '/onca/xml'
    qs, signature = aws_conn.get_signature(params, verb, path)
    qs = path + '?' + qs + '&Signature=' + urllib.quote(signature)
    response = aws_conn._mexe(verb, qs, None, headers={})
    content = response.read()
    tree = ElementTree.fromstring(content)
    NS = tree.tag.split('}')[0][1:]
 
    children = []
    try:
        for node in tree.find('{%s}BrowseNodes'%NS).find('{%s}BrowseNode'%NS).find('{%s}Children'%NS).findall('{%s}BrowseNode'%NS):
            name = node.find('{%s}Name'%NS).text
            id = node.find('{%s}BrowseNodeId'%NS).text
            children.append( id )
            BROWSENODES[id] = name
    except:
        return []
    return children
 
 
if __name__ == '__main__':
    BROWSENODES['1000'] = 'Books'
    count = 0
    LIMIT = 25
    for node in bfs('1000', amazon_browsenodelookup_children):
        count = count + 1
        if count > LIMIT:
            break
        print BROWSENODES[node], '-', node