Sophie

Sophie

distrib > Mandriva > 2008.1 > x86_64 > by-pkgid > 2df1e95c77a205902f28a4e06c143abf > files > 10

beagle-0.3.3-7.3mdv2008.1.src.rpm

#!/usr/bin/env python
#vim:fileencoding=utf8

#   Index Web pages when you visist them using beagle
#   Copyright (C) 2007 Tao Fei     
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License along
#   with this program; if not, write to the Free Software Foundation, Inc.,
#   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


import gtk
import epiphany
import md5
import os
import ConfigParser
import re
import string
import mimetypes
import gettext

#these are constant
beagle_data_path = os.environ["HOME"] + "/.beagle/ToIndex/"
config_file_path = os.environ["HOME"] + "/.gnome2/epiphany/extensions/beagleIndexer.conf"

#The following is about config
class Config(dict):
    def __getattr__(self, name):
        return self[name]
    def __setattr__(self, name,value):
        self[name] = value

def load(fullpath, default={}):
    '''Load a config file as a Config Object'''
    module = type(os)
    try:
        code = open(fullpath).read()
    except IOError:
        return Config(default)
        #raise ImportError, 'No module named  %s' %fullpath

    filename = os.path.basename(fullpath)

    m = module(filename)
    exec compile(code, filename, 'exec') in m.__dict__
    for item in default.items():
        m.__dict__.setdefault(item[0],item[1])
    return Config(m.__dict__)

def save(fullpath,config):
    '''Save a Config Object to a config file'''
    try:
        outfile = open(fullpath,'w')
    except IOError:
        print "beagle save config file to to %s error " %fullpath
        return
    for key in config.keys():
        if key[0] == '_':
            continue
        outfile.write(key + '=')
        value = config[key]
        if type(value) == type(True):
            outfile.write(str(value) + '\n')
        elif type(value) == type([]):
            outfile.write('[\n')
            for li in value:
                outfile.write('"' + li + '",\n')
            outfile.write(']\n')
        else:
            outfile.write('"Not supported type"\n')
    outfile.close()



#default config value
_ConfigDefault = {
    'auto_index':True,
    'prompt_keyword':False,
    'index_https':False,
    'default_index':True,
    'white_list_first':True,
    'white_list':[],
    'black_list':[],
}


#load config
config = load(config_file_path,_ConfigDefault)

#i18n init
try:
    gettext.install('py_beagle_for_epiphany',config.locale_dir_path)
except:
    gettext.install('py_beagle_for_epiphany')

#The following code is about menu item 
_ui_str = """
<ui>
 <menubar name="menubar">
  <menu name="ToolsMenu" action="Tools">
   <separator/>
   <menu name="BeagleMenu" action="BeagleMenuAction">
     <menuitem name="PyBeagleExtAuto"
          action="PyBeagleExtAutoAction"/>
     <menuitem name="PyBeagleExtPromptKeyword"
          action="PyBeagleExtPromptKeywordAction"/>
     <menuitem name="PyBeagleExtIndexThisPage"
          action="PyBeagleExtIndexThisPageAction"/>
     <menuitem name="PyBeagleExtReloadConfig"
          action="PyBeagleExtReloadConfigAction"/>
   </menu>
   <separator/>
  </menu>
 </menubar>
 <popup name="EphyDocumentPopup" action="PopupAction">
   <menuitem action="PyBeagleExtAutoAction"/>
   <menuitem action="PyBeagleExtIndexThisPageAction"/>
 </popup>
 <popup name="EphyLinkPopup" action="PopupAction">
   <menuitem action="PyBeagleExtAutoAction"/>
   <menuitem action="PyBeagleExtIndexThisPageAction"/>
   <menuitem name="IndexLink" 
    action="PyBeagleExtIndexLinkAction"/>
 </popup>
</ui>
"""


# we use window.get_active_tab(): we want the menu entries to reflect the active
# tab, not necessarily the one which fired a signal.
def _update_action(window):
    '''update action for PyBeagleExtIndexThisPage (enabe after page loaded)'''
    index_this_page_action = window.get_ui_manager().get_action('/menubar/ToolsMenu/BeagleMenu/PyBeagleExtIndexThisPage')
    tab = window.get_active_tab()
    # Tab is None when a window is first opened
    sensitive = (tab != None and tab.get_load_status() != True)
    index_this_page_action.set_sensitive(sensitive)

def _switch_page_cb(notebook, page, page_num, window):
    '''update the action (index this page ) when swith page'''
    _update_action(window)

def _index_this_page_cb(action, window):
    '''callback for index_this_page action'''
    tab = window.get_active_tab()
    embed = tab.get_embed()
    index_embed(tab,embed,True)
    set_status_label(window,_("beagle is indexing %s") % embed.get_location(True))

def _toggle_auto_cb(action,window):
    '''enable/diable auto index '''
    if config.auto_index != action.get_active():
        config.auto_index = action.get_active()
        save(config_file_path, config)

def _toggle_prompt_keywords_cb(action,window):
    '''enable/diable prompt for keyword'''
    if config.prompt_keyword != action.get_active():
        config.prompt_keyword = action.get_active()

def _index_link_cb(action,window):
    '''callback for index_link action'''
    event = window.get_context_event()
    if event is None:
        return
    value = event.get_event_property("link")
    index_link(value)
    set_status_label(window,_("beagle is indexing link %s") %value)
    pass

def _load_status_cb(tab,event,window):
    '''
    Callback for load_status chanage
    the load_status == false means the page is loaded.
    So we will do our job 
    '''
    _update_action(window)
    if not config.auto_index:
        print "Auto Index is turned off. No index "
        return
    #page is loaded  
    if tab != None and tab.get_load_status() != True:
        embed = tab.get_embed()
        url = embed.get_location(True)
        if should_index(url) == False:
            print "%s will NOT be indexed." % url
            return
        index_embed(tab,embed,False)
        set_status_label(window,"beagle will index %s " % url)

def _reload_config_cb(action,window):
    '''reaload config file '''
    config = load(config_file_path,_ConfigDefault)
    init_ui(window)


# This is to pass to gtk.ActionGroup.add_actions()
_actions = [
        ('BeagleMenuAction',None,'Beagle',None,None,None),
        ('PyBeagleExtIndexThisPageAction', None,
	     _('Index This Page'), None, None, _index_this_page_cb),
	    ('PyBeagleExtIndexLinkAction',None,
         _('Index Link'), None, None, _index_link_cb),
	    ('PyBeagleExtReloadConfigAction',None,
         _('Reload Config File'), None, None, _reload_config_cb),
	   ]
_toggle_actions = [
        ("PyBeagleExtAutoAction",None,
         _("Auto Index"),None,None,_toggle_auto_cb),
        ("PyBeagleExtPromptKeywordAction",None,
         _("Prompt key words when index on demand"),None,None,_toggle_prompt_keywords_cb),
]


def set_status_label(window,msg):
    '''set status label with msg'''
    statusbar = window.get_statusbar()
    context_id = statusbar.get_context_id("beagle")
    statusbar.pop(context_id)
    statusbar.push(context_id,msg)

def init_ui(window):
    '''ui set auto-index and prompt-keyword action to active/no-active'''
    auto_index_action = window.get_ui_manager().get_action('/menubar/ToolsMenu/BeagleMenu/PyBeagleExtAuto')
    auto_index_action.set_active(config.auto_index)
    prompt_keyword_action = window.get_ui_manager().get_action('/menubar/ToolsMenu/BeagleMenu/PyBeagleExtPromptKeyword')
    prompt_keyword_action.set_active(config.prompt_keyword)

def prompt_for_keyword():
    '''open a dialgo , ask for extra keyword to index'''
    dialog = gtk.Dialog("Keywords",None,gtk.DIALOG_MODAL,
          (gtk.STOCK_CANCEL, gtk.RESPONSE_REJECT, gtk.STOCK_OK, gtk.RESPONSE_ACCEPT)
          )
    dialog.set_default_response(gtk.RESPONSE_ACCEPT)
    label = gtk.Label(_("Extra keywords to index"))
    entry = gtk.Entry()
    dialog.vbox.pack_start(label)
    dialog.vbox.pack_start(entry)
    label.show()
    entry.show()
    response = dialog.run()
    if response == gtk.RESPONSE_ACCEPT:
        ret = entry.get_text()
    else:
        ret = ""
    dialog.destroy()
    return ret

def should_index(url):
    '''check weahter we should index the url'''
    url = url.lower()
    if not config.index_https and url.find("https") == 0:
        return False
    in_blacklist = False
    for item in config.black_list:
        if re.match(item,url):
            in_blacklist = True
            break
    in_whitelist = False
    for item in config.white_list:
        if re.match(item,url):
            in_whitelist = True
            break
    if in_blacklist and in_whitelist:
        return config.default_index
    if (not in_blacklist) and (not in_whitelist):
        return config.white_list_first
    return in_whitelist

def index_embed(tab,embed,ondemand=True):
    '''index the page (in tab,embed)'''
    url = embed.get_location(True)
    print "beagle index embed " + url
    md5_hash = md5.new(url).hexdigest() 
    beagle_content_path = beagle_data_path + "epiphany-" + md5_hash 
    beagle_meta_path = beagle_data_path + ".epiphany-" + md5_hash
    write_content(embed, beagle_content_path)
    meta = get_meta_from_embed(url,embed,tab) 
    if ondemand and config.prompt_keyword:
        keywords = prompt_for_keyword()
        if keywords != "":
            meta.append("t:dc:keyword:%s" % keywords)
    write_raw_meta(meta, beagle_meta_path)

def index_link(url, ondemand=True):
    '''index the linked file'''
    md5_hash = md5.new(url).hexdigest() 
    beagle_content_path = beagle_data_path + "epiphany-" + md5_hash 
    beagle_meta_path = beagle_data_path + ".epiphany-" + md5_hash
    write_file(url, beagle_content_path)
    meta = get_meta_from_url(url) 
    if ondemand and config.prompt_keyword:
        keywords = prompt_for_keyword()
        if keywords != "":
            meta.append("t:dc:keyword:%s" % keywords)
    write_raw_meta(meta, beagle_meta_path)

def write_file(url,path):
    '''save file from url to path'''
    persist = epiphany.ephy_embed_factory_new_object("EphyEmbedPersist")
    persist.set_flags(epiphany.EMBED_PERSIST_NO_VIEW)
    persist.set_source(url)
    persist.set_dest(path)
    def save_completed_cb(persist,url):
        print "save completed for %s" %url
    persist.connect("completed",save_completed_cb,url)
    persist.save()

def write_content(embed,path):
    '''write embed to path'''
    persist = epiphany.ephy_embed_factory_new_object("EphyEmbedPersist")
    persist.set_flags(epiphany.EMBED_PERSIST_NO_VIEW 
                    |epiphany.EMBED_PERSIST_COPY_PAGE 
                    |epiphany.EMBED_PERSIST_MAINDOC
                    |epiphany.EMBED_PERSIST_FROM_CACHE)
    persist.set_embed(embed)
    persist.set_dest(path)
    persist.save()

def get_meta_from_url(url):
    '''get mata data from url , the cotnent type is "guessed"'''
    return [
        url,
        "WebHistory",
        guess_content_type(url),
    ]

def get_meta_from_embed(url,embed,tab):
    '''get mata data from embed "'''
    #guess content type here
    content_type = ""
    doc_type = tab.get_document_type()
    if doc_type == epiphany.EMBED_DOCUMENT_HTML:
        content_type = "text/html"
    elif doc_type == epiphany.EMBED_DOCUMENT_XML:
        content_type = "text/xml"
    else:
        content_type = guess_content_type(url)
    return [
        url,
        "WebHistory",
        content_type,
        "k:_unindexed:encoding="+embed.get_encoding()
    ]

def write_raw_meta(metas,path):
    '''write raw meta'''
    meta_file = open(path,'w')
    for meta in metas:
        meta_file.write(meta + '\n')
    meta_file.close()

def guess_content_type(url):
    '''guess content type
    that's not reliabe. but I found no API to get the contenttype
    '''
    type,encoding = mimetypes.guess_type(url)
    if type is None:
       return ""
    else:
       return type

def check_env():
    '''check environment , just make sure there is ~/.beagle/ToIndex'''
    return os.path.isdir(beagle_data_path)

#Implement epiphany extension interface

def attach_window(window):
    if not check_env():
        print "Not Found Beagle"
        return
    ui_manager = window.get_ui_manager()
    group = gtk.ActionGroup('PyBeagleExt')
    group.add_actions(_actions, window)
    group.add_toggle_actions(_toggle_actions, window)
    ui_manager.insert_action_group(group, -1)
    ui_id = ui_manager.add_ui_from_string(_ui_str)
    window._py_beagle_window_data = (group, ui_id)
    notebook = window.get_notebook()
    sig = notebook.connect('switch_page', _switch_page_cb, window)
    notebook._py_beagle_sig = sig
    init_ui(window)    

def detach_window(window):
    notebook = window.get_notebook()
    if hasattr(notebook,"_py_beagle_sig"):
        notebook.disconnect(notebook._py_beagle_sig)
        del notebook._py_beagle_sig
    if hasattr(window,"_py_beagle_window_data"):
        group, ui_id = window._py_beagle_window_data
        del window._py_beagle_window_data
        ui_manager = window.get_ui_manager()
        ui_manager.remove_ui(ui_id)
        ui_manager.remove_action_group(group)
        ui_manager.ensure_update()


def attach_tab(window,tab):
    if not check_env():
        print "Not Found Beagle"
        return
    sig = tab.connect("notify::load-status",_load_status_cb,window)
    tab._python_load_status_sig = sig

def detach_tab(window,tab):
    if hasattr(tab,"_python_beagle_load_status_sig"):
        tab.disconnect(tab._python_beagle_load_status_sig)
        del tab._python_beagle_load_status_sig