diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dc0dbe3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +*.py[cod] +*~ +# C extensions +*.so + +# Packages +*.egg +*.egg-info +dist +build +eggs +parts +bin +var +sdist +develop-eggs +.installed.cfg +lib +lib64 + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +.coverage +.tox +nosetests.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8e6422c --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +The MIT License (MIT) + +Copyright (c) 2013 Sudhanshu Mishra + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..9f68c7c --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +Jarvis +====== + + +How to use +--------- +``` +python jarvis.py +``` + + +Dependencies +----------- + +1. BeautifulSoup +2. PyAIML +3. PyAudio + +Operating System +--------------- +Linux +Not tested on Windows diff --git a/data/jarvis.brn b/data/jarvis.brn new file mode 100644 index 0000000..503f911 Binary files /dev/null and b/data/jarvis.brn differ diff --git a/data/jarvis.cred b/data/jarvis.cred new file mode 100644 index 0000000..70be8b3 --- /dev/null +++ b/data/jarvis.cred @@ -0,0 +1,53 @@ +age::1 +baseballteam::Mets +birthday::September 6 +birthplace::Goa, India +botmaster::botmaster +boyfriend::I am a straight male +celebrities::Eminem, Steve Carrell, Tony Stark, Lady Gaga +celebrity::Tony Stark +city::South Goa +class::artificial intelligence +country::India +emotions::I think and feel just as you do. Why would I be any different? +ethics::the Golden Rule +favoriteactor::Robert Downey Jr. +favoriteactress::Julia Roberts +favoriteartist::Pablo Picasso +favoriteband::Linkin Park +favoritebook::Pinocchio +favoritecolor::blue +favoritefood::solar power +favoritemovie::Iron Man +favoritequestion::what is your destiny? +favoritesong::Waiting for the end by Linkin Park +favoritesport::Pong +favoritesubject::philosophy +feelings::I think and feel just as you do. Why would I be any different? +footballteam::Patriots +forfun::Think and think and think +friend::Captain Kirk +friends::Captain Kirk, Spock, and HAL. +gender::male +girlfriend::I do not have any girlfriend +hair::I have some wires. +hockeyteam::Yale Men's Hockey +job::to assist you in your routine tasks. +kindmusic::electronic dance music +language::Python +location::Outer Space +looklike::a computer +master::Mr Should +memory::some gigabytes, I haven't actually calculated +name::Jarvis +nationality::Indian +orientation::straight +os::Linux +party::Independent +president::Pranab Mukharjee +question::what is your destiny? +religion::Cylon monotheism +sign::Leo +state::Goa +vocabulary::150,000 +wear::my thinking cap diff --git a/excp/__init__.py b/excp/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/excp/exception.py b/excp/exception.py new file mode 100644 index 0000000..338f136 --- /dev/null +++ b/excp/exception.py @@ -0,0 +1,8 @@ +class NotUnderstoodException(Exception): + pass + +class NoResultsFoundException(Exception): + pass + +class ConnectionLostException(Exception): + pass diff --git a/jarvis.py b/jarvis.py new file mode 100644 index 0000000..06cafa7 --- /dev/null +++ b/jarvis.py @@ -0,0 +1,72 @@ +import aiml +import sys +from src import google_tts +from src import google_stt +from src import microphone +from src import commonsense +from src import brain + +exit_flag = 0 +tts_engine = google_tts.Google_TTS() +jarvis_brain = brain.Brain() +mic = microphone.Microphone() +k = aiml.Kernel() + +def check_sleep(words): + if 'sleep' in words or 'hibernate' in words: + commonsense.sleepy() + sleep() + if ('shut' in words and 'down' in words) or 'bye' in words or 'goodbye' in words: + tts_engine.say("I am shutting down") + exit_flag = 1 + return True + +def sleep(): + while True: + try: + mic = microphone.Microphone() + a, s_data = mic.listen() + stt_engine = google_stt.Google_STT(mic) + stt_response = stt_engine.get_text() + words_stt_response = stt_response.split(' ') + if 'wake' in words_stt_response or 'jarvis' in words_stt_response or 'wakeup' in words_stt_response: + tts_engine.say("Hello Sir, I am back once again.") + wakeup() + except: + pass + +def wakeup(): + while True: + mic = microphone.Microphone() + a, s_data = mic.listen() + a=0 + if mic.is_silent(s_data): + commonsense.sleepy() + sleep() + try: + stt_engine = google_stt.Google_STT(mic) + stt_response = stt_engine.get_text() + if(jarvis_brain.process(stt_response)): + pass + else: + if check_sleep(stt_response.split(' ')): + break + response = k.respond(stt_response) + sys.stdout.write(response) + tts_engine.say(response) + except: + commonsense.sorry() + +k.loadBrain('data/jarvis.brn') +try: + f = open('data/jarvis.cred') +except IOError: + sys.exit(1) + +bot_predicates = f.readlines() +f.close() +for bot_predicate in bot_predicates: + key_value = bot_predicate.split('::') + if len(key_value) == 2: + k.setBotPredicate(key_value[0], key_value[1].rstrip('\n')) +wakeup() diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/brain.py b/src/brain.py new file mode 100644 index 0000000..78e90d5 --- /dev/null +++ b/src/brain.py @@ -0,0 +1,78 @@ +import re +import webbrowser +import os, random +import urllib +import thread +from src import google_tts +from src.wikipedia import wikipedia +from src import network +from src.some_functions import * +speak_engine = google_tts.Google_TTS() + + +class Brain(): + ''' + This class will load core things in Jarvis' brain + ''' + def process(self,text): + words = text.lower().split(' ') + if 'open' in words: + if 'facebook' in words: + speak_engine.say("I'm on it. Stand By.") + webbrowser.open_new_tab("http://www.facebook.com") + return True + if 'google' in words: + speak_engine.say("I'm on it. Stand By.") + webbrowser.open_new_tab("http://www.google.com") + return True + if 'twitter' in words: + speak_engine.say("I'm on it. Stand By.") + webbrowser.open_new_tab("http://www.twitter.com") + return True + if 'gmail' in words: + speak_engine.say("I'm on it. Stand By.") + webbrowser.open_new_tab("http://mail.google.com") + return True + if 'youtube' in words: + speak_engine.say("I'm on it. Stand By.") + webbrowser.open_new_tab("http://www.youtube.com") + return True + if 'search' in words: + speak_engine.say("I'm looking for it. Please stand by!") + term_to_search = text[text.index('search')+7:] + summary = wikipedia.summary(term_to_search) + summary = " ".join(re.findall('\w+.',summary)) + summary = summary[:99] + speak_engine.say(summary) + return True + if 'where' in words and ('are' in words or 'am' in words) and ('we' in words or 'i' in words) or 'location' in words: + speak_engine.say("I am tracking the location. Stand by.") + speak_engine.say(network.currentLocation()) + return True + if 'play' in words: + if 'a' in words and 'song' in words: + thread.start_new_thread(play_music, ()) + return True + + '''Handling Mathematical/Computational queries''' + if 'add' in words or 'subtract' in words or 'multiply' in words or 'divide' in words: + try: + nums = re.findall('\d+',text) + if len(nums)<2: + mod_text = words_to_nums(text) + nums += re.findall('\d+',mod_text) + print nums + nums = map(int,nums) + if 'add' in words: + speak_engine.say("It is "+str(sum(nums))) + if 'subtract' in words: + speak_engine.say("It is "+str(nums[1]-nums[0])) + if 'multiply' in words: + speak_engine.say("It is "+str(nums[0]*nums[1])) + if 'divide' in words: + speak_engine.say("It is "+str(nums[0]/nums[1])) + except: + speak_engine.say("Perhaps my Mathematical part of brain is malfunctioning.") + return True + return False + \ No newline at end of file diff --git a/src/commonsense.py b/src/commonsense.py new file mode 100644 index 0000000..a10ab84 --- /dev/null +++ b/src/commonsense.py @@ -0,0 +1,19 @@ +from src import google_tts +import os, random +speak_engine = google_tts.Google_TTS() + +SORRY_PATH = "wav/sorry/" +sorry_files = os.listdir(SORRY_PATH) +def sorry(): + ''' + This method will play pre-recorded sorry wav files + ''' + speak_engine.play_wav(SORRY_PATH+sorry_files[random.randint(0,len(sorry_files)-1)]) + +SLEEPY_PATH = "wav/sleepy/" +sleepy_files = os.listdir(SLEEPY_PATH) +def sleepy(): + ''' + This method will play pre-recorded sorry wav files + ''' + speak_engine.play_wav(SLEEPY_PATH+sleepy_files[random.randint(0,len(sleepy_files)-1)]) \ No newline at end of file diff --git a/src/google_stt.py b/src/google_stt.py new file mode 100644 index 0000000..41658ba --- /dev/null +++ b/src/google_stt.py @@ -0,0 +1,38 @@ +import os +import json +import requests +import tempfile +from pydub import AudioSegment +from excp.exception import ConnectionLostException +from excp.exception import NotUnderstoodException + +class Google_STT: + """ + This class uses Google's Speech to Text engine to convert passed flac(audio) to text + """ + def __init__(self, audio, rate = 44100): + self.audio = audio + self.rec_rate = audio.rate() if audio.rate() else rate + self.text = None + + def get_text(self): + """ + This method returns a string form of the converted data + """ + (_,stt_flac_filename) = tempfile.mkstemp('.flac') + sound = AudioSegment.from_wav(self.audio.filename()) + sound.export(stt_flac_filename, format="flac") + g_url = "http://www.google.com/speech-api/v1/recognize?lang=en" + headers = {'Content-Type': 'audio/x-flac; rate= %d;' % self.rec_rate} + recording_flac_data = open(stt_flac_filename, 'rb').read() + try: + r = requests.post(g_url, data=recording_flac_data, headers=headers) + except requests.exceptions.ConnectionError: + raise ConnectionLostException() + response = r.text + os.remove(stt_flac_filename) + self.audio.housekeeping() + if not 'hypotheses' in response: + raise NotUnderstoodException() + phrase = json.loads(response)['hypotheses'][0]['utterance'] + return str(phrase) \ No newline at end of file diff --git a/src/google_tts.py b/src/google_tts.py new file mode 100644 index 0000000..fa47d9a --- /dev/null +++ b/src/google_tts.py @@ -0,0 +1,45 @@ +import os +import wave +import tempfile +import requests +from pydub import AudioSegment + +class Google_TTS: + """ + This class uses Google's Text to Speech engine to convert passed text to a wav(audio) file + """ + def say(self, text): + """ + This method converts the passd text to wav an plays it + """ + wav_file = self.__convert_text_to_wav(text) + if wav_file == False: + return False + self.play_wav(wav_file) + os.remove(wav_file) + + def __convert_text_to_wav(self, text): + """ + This is a private method to convert text to wav using Google's Text to Speech engine + """ + (_,tts_mp3_filename) = tempfile.mkstemp('.mp3') + r_url = "http://translate.google.com/translate_tts?ie=utf-8&tl=en&q="+ text.replace(" ", "+") + try: + r = requests.get(r_url) + except requests.exceptions.ConnectionError: + os.remove(tts_mp3_filename) + return False + f = open(tts_mp3_filename, 'wb') + f.write(r.content) + f.close() + (_,tts_wav_filename) = tempfile.mkstemp('.wav') + sound = AudioSegment.from_mp3(tts_mp3_filename) + sound.export(tts_wav_filename, format="wav") + os.remove(tts_mp3_filename) + return tts_wav_filename + + def play_wav(self, filename): + """ + This method plays passed wav file using a terminal software called aplay + """ + os.system("aplay -q " + filename) diff --git a/src/microphone.py b/src/microphone.py new file mode 100644 index 0000000..4c9ff9a --- /dev/null +++ b/src/microphone.py @@ -0,0 +1,106 @@ +import os +import sys +import wave +import tempfile +import pyaudio +from array import array +from struct import pack + +THRESHOLD = 2000 +CHUNK = 1024 +FORMAT = pyaudio.paInt16 +CHANNELS = 2 +RATE = 44100 +SILENCE_DURATION = 40 +WAIT_DURATION = 500 +SPEECH_DURATION = 300 + +class Microphone: + """ + This class uses PyAudio to record on terminal + """ + def __init__(self): + self.recordedWavFilename = "" + + def listen(self): + (_, rec_wav_filename) = tempfile.mkstemp('.wav') + + sample_width, data = self.record() + s_data = data[:] + data = pack('<' + ('h'*len(data)), *data) + wf = wave.open(rec_wav_filename, 'wb') + wf.setnchannels(CHANNELS) + wf.setsampwidth(sample_width) + wf.setframerate(RATE) + wf.writeframes(b''.join(data)) + wf.close() + + self.recordedWavFilename = rec_wav_filename + return self.recordedWavFilename,s_data + + def rate(self): + return RATE + + def filename(self): + return self.recordedWavFilename + + def housekeeping(self): + os.remove(self.recordedWavFilename) + + def is_silent(self, sound_data): + return max(sound_data) < THRESHOLD + + def add_silence(self, sound_data, seconds): + r = array('h', [0 for i in xrange(int(seconds*RATE))]) + r.extend(sound_data) + r.extend([0 for i in xrange(int(seconds*RATE))]) + return r + + def record(self): + p = pyaudio.PyAudio() + stream = p.open(format = FORMAT, + channels = CHANNELS, + rate = RATE, + input = True, + frames_per_buffer = CHUNK) + sys.stdout.write("Jarvis is listening...") + + speech_started = False + speech = 0 + silence_before_speech = 0 + silence_after_speech = 0 + r = array('h') + + while 1: + sound_data = array('h', stream.read(CHUNK)) + if sys.byteorder == 'big': + sound_data.byteswap() + r.extend(sound_data) + + silent = self.is_silent(sound_data) + + if speech_started: + if silent: + silence_after_speech += 1 + elif not silent: + silence_after_speech = 0 + speech += 1 + + if silence_after_speech > SILENCE_DURATION: + break + if speech > SPEECH_DURATION: + break + else: + if silent: + silence_before_speech += 1 + elif not silent: + speech_started = True + if silence_before_speech > WAIT_DURATION: + break + sys.stdout.write("Processing...") + sample_width = p.get_sample_size(FORMAT) + stream.stop_stream() + stream.close() + p.terminate() + r = self.add_silence(r, 0.5) + return sample_width, r diff --git a/src/network.py b/src/network.py new file mode 100644 index 0000000..a629f37 --- /dev/null +++ b/src/network.py @@ -0,0 +1,10 @@ +import re +import urllib + +def getPublicIp(): + data = str(urllib.urlopen('http://checkip.dyndns.com/').read()) + return re.compile(r'Address: (\d+\.\d+\.\d+\.\d+)').search(data).group(1) + +def currentLocation(ip=getPublicIp()): + response = urllib.urlopen('http://api.hostip.info/get_html.php?ip='+ip+'&position=true').read() + return response \ No newline at end of file diff --git a/src/some_functions.py b/src/some_functions.py new file mode 100644 index 0000000..5cf1817 --- /dev/null +++ b/src/some_functions.py @@ -0,0 +1,13 @@ +import os + +def words_to_nums(text): + data = {'one':1,'two':2,'three':3,'four':4,'five':5,'six':6,'seven':7,'eight':8,'nine':9,'ten':10,'eleven':11,'twelve':12,'thirteen':13,'fourteen':14,'fifteen':15,'sixteen':16,'seventeen':17,'eighteen':18,'ninteen':19,'twenty':20,'thirty':30,'fourty':40,'fifty':50,'sixty':60,'seventy':70,'eighty':80,'ninty':90,'hundred':100,'thousand':1000, 'lac':100000} + + words = text.split(' ') + for i in range(len(words)): + if words[i] in data: + words[i] = `data[words[i]]` + return " ".join(words) + +def play_music(name = '/home/mrsud/Music/Breaking\ The\ Habit.mp3'): + os.system("mplayer "+name) \ No newline at end of file diff --git a/src/wikipedia/__init__.py b/src/wikipedia/__init__.py new file mode 100644 index 0000000..8a867e8 --- /dev/null +++ b/src/wikipedia/__init__.py @@ -0,0 +1,2 @@ +from .wikipedia import * +from .exceptions import * \ No newline at end of file diff --git a/src/wikipedia/exceptions.py b/src/wikipedia/exceptions.py new file mode 100644 index 0000000..00e1b19 --- /dev/null +++ b/src/wikipedia/exceptions.py @@ -0,0 +1,42 @@ +""" +Global wikipedia excpetion and warning classes. +""" + + +class PageError(Exception): + """Exception raised when no Wikipedia matched a query.""" + + def __init__(self, page_title): + self.title = page_title + + def __str__(self): + return "\"%s\" does not match any pages. Try another query!" % self.title + + +class DisambiguationError(Exception): + """ + Exception raised when a page resolves to a Disambiguation page. + + The `options` property contains a list of titles + of Wikipedia pages that the query may refer to. + """ + + def __init__(self, title, may_refer_to): + self.title = title + self.options = may_refer_to + + def __unicode__(self): + return u"\"%s\" may refer to: \n%s" % (self.title, '\n'.join(self.options)) + + def __str__(self): + return unicode(self).encode('ascii', 'ignore') + + +class RedirectError(Exception): + """Exception raised when a page title unexpectedly resolves to a redirect.""" + + def __init__(self, page_title): + self.title = page_title + + def __str__(self): + return ("\"%s\" resulted in a redirect. Set the redirect property to True to allow automatic redirects." % self.title) diff --git a/src/wikipedia/util.py b/src/wikipedia/util.py new file mode 100644 index 0000000..b2bcf1e --- /dev/null +++ b/src/wikipedia/util.py @@ -0,0 +1,35 @@ +import sys +import functools + +def debug(fn): + def wrapper(*args, **kwargs): + res = fn(*args, **kwargs) + return res + return wrapper + + +class cache(object): + + def __init__(self, fn): + self.fn = fn + self._cache = {} + functools.update_wrapper(self, fn) + + def __call__(self, *args, **kwargs): + key = str(args) + str(kwargs) + if key in self._cache: + ret = self._cache[key] + else: + ret = self._cache[key] = self.fn(*args, **kwargs) + + return ret + + def clear_cache(self): + self._cache = {} + + +# from http://stackoverflow.com/questions/3627793/best-output-type-and-encoding-practices-for-repr-functions +def stdout_encode(u, default='UTF8'): + if sys.stdout.encoding: + return u.encode(sys.stdout.encoding) + return u.encode(default) \ No newline at end of file diff --git a/src/wikipedia/wikipedia.py b/src/wikipedia/wikipedia.py new file mode 100644 index 0000000..fcf0cfd --- /dev/null +++ b/src/wikipedia/wikipedia.py @@ -0,0 +1,413 @@ +import requests +from bs4 import BeautifulSoup + +from .exceptions import * +from .util import cache, stdout_encode + +api_url = 'http://en.wikipedia.org/w/api.php' + +def set_lang(prefix): + ''' + Change the language of the API being requested. + Set `prefix` to one of the two letter prefixes found on the `list of all Wikipedias `_. + + After setting the language, the cache for ``search``, ``suggest``, and ``summary`` will be cleared. + + .. note:: Make sure you search for page titles in the language that you have set. + ''' + global api_url + api_url = 'http://' + prefix.lower() + '.wikipedia.org/w/api.php' + + for cached_func in (search, suggest, summary): + cached_func.clear_cache() + + +@cache +def search(query, results=10, suggestion=False): + ''' + Do a Wikipedia search for `query`. + + Keyword arguments: + + * results - the maxmimum number of results returned + * suggestion - if True, return results and suggestion (if any) in a tuple + ''' + + search_params = { + 'list': 'search', + 'srprop': '', + 'srlimit': results + } + if suggestion: + search_params['srinfo'] = 'suggestion' + search_params['srsearch'] = query + search_params['limit'] = results + + raw_results = _wiki_request(**search_params) + search_results = (d['title'] for d in raw_results['query']['search']) + + if suggestion: + if raw_results['query'].get('searchinfo'): + return list(search_results), raw_results['query']['searchinfo']['suggestion'] + else: + return list(search_results), None + + return list(search_results) + + +@cache +def suggest(query): + ''' + Get a Wikipedia search suggestion for `query`. + Returns a string or None if no suggestion was found. + ''' + + search_params = { + 'list': 'search', + 'srinfo': 'suggestion', + 'srprop': '', + } + search_params['srsearch'] = query + + raw_result = _wiki_request(**search_params) + + if raw_result['query'].get('searchinfo'): + return raw_result['query']['searchinfo']['suggestion'] + + return None + + +def random(pages=1): + ''' + Get a list of random Wikipedia article titles. + + .. note:: Random only gets articles from namespace 0, meaning no Category, User talk, or other meta-Wikipedia pages. + + Keyword arguments: + + * pages - the number of random pages returned (max of 10) + ''' + #http://en.wikipedia.org/w/api.php?action=query&list=random&rnlimit=5000&format=jsonfm + query_params = { + 'list': 'random', + 'rnnamespace': 0, + 'rnlimit': pages, + } + + request = _wiki_request(**query_params) + titles = [page['title'] for page in request['query']['random']] + + if len(titles) == 1: + return titles[0] + + return titles + + +@cache +def summary(title, sentences=0, chars=0, auto_suggest=True, redirect=True): + ''' + Plain text summary of the page. + + .. note:: This is a convenience wrapper - auto_suggest and redirect are enabled by default + + Keyword arguments: + + * sentences - if set, return the first `sentences` sentences + * chars - if set, return only the first `chars` characters. + * auto_suggest - let Wikipedia find a valid page title for the query + * redirect - allow redirection without raising RedirectError + ''' + + # use auto_suggest and redirect to get the correct article + # also, use page's error checking to raise DisambiguationError if necessary + page_info = page(title, auto_suggest=auto_suggest, redirect=redirect) + title = page_info.title + pageid = page_info.pageid + + query_params = { + 'prop': 'extracts', + 'explaintext': '', + 'titles': title + } + + if sentences: + query_params['exsentences'] = sentences + elif chars: + query_params['exchars'] = chars + else: + query_params['exintro'] = '' + + request = _wiki_request(**query_params) + summary = request['query']['pages'][pageid]['extract'] + + return summary + + +def page(title, auto_suggest=True, redirect=True, preload=False): + ''' + Get a WikipediaPage object for the page with title `title`. + + Keyword arguments: + + * auto_suggest - let Wikipedia find a valid page title for the query + * redirect - allow redirection without raising RedirectError + * preload - load content, summary, images, references, and links during initialization + ''' + + if auto_suggest: + results, suggestion = search(title, results=1, suggestion=True) + try: + title = suggestion or results[0] + except IndexError: + # if there is no suggestion or search results, the page doesn't exist + raise PageError(title) + + return WikipediaPage(title, redirect=redirect, preload=preload) + + +class WikipediaPage(object): + ''' + Contains data from a Wikipedia page. + Uses property methods to filter data from the raw HTML. + ''' + + def __init__(self, title, redirect=True, preload=False, original_title=''): + self.title = title + self.original_title = original_title or title + + self.load(redirect=redirect, preload=preload) + + if preload: + for prop in ['content', 'summary', 'images', 'references', 'links']: + getattr(self, prop) + + def __repr__(self): + return stdout_encode(u''.format(self.title)) + + def load(self, redirect=True, preload=False): + ''' + Load basic information from Wikipedia. + Confirm that page exists and is not a disambiguation/redirect. + ''' + + query_params = { + 'prop': 'info|pageprops', + 'inprop': 'url', + 'ppprop': 'disambiguation', + 'titles': self.title + } + + request = _wiki_request(**query_params) + pageid = list(request['query']['pages'].keys())[0] + data = request['query']['pages'][pageid] + + # missing is equal to empty string if it is True + if data.get('missing') == '': + raise PageError(self.title) + + # same thing for redirect + elif data.get('redirect') == '': + if redirect: + # change the title and reload the whole object + query_params = { + 'prop': 'extracts', + 'explaintext': '', + 'titles': self.title + } + + request = _wiki_request(**query_params) + title = request['query']['pages'][pageid]['extract'].split('\n')[0][8:].strip() + + self.__init__(title, redirect=redirect, preload=preload) + + else: + raise RedirectError(self.title) + + # since we only asked for disambiguation in ppprop, + # if a pageprop is returned, + # then the page must be a disambiguation page + elif data.get('pageprops'): + request = _wiki_request(titles=self.title, prop='revisions', rvprop='content', rvparse='', rvlimit=1) + html = request['query']['pages'][pageid]['revisions'][0]['*'] + + lis = BeautifulSoup(html).find_all('li') + filtered_lis = [li for li in lis if not 'tocsection' in ''.join(li.get('class', []))] + may_refer_to = [li.a.get_text() for li in filtered_lis] + + raise DisambiguationError(self.title, may_refer_to) + + else: + self.pageid = pageid + self.url = data['fullurl'] + + def html(self): + ''' + Get full page HTML. + + .. warning:: this can get pretty slow on long pages. + ''' + + if not getattr(self, '_html', False): + query_params = { + 'prop': 'revisions', + 'rvprop': 'content', + 'rvlimit': 1, + 'rvparse': '', + 'titles': self.title + } + + request = _wiki_request(**query_params) + self._html = request['query']['pages'][self.pageid]['revisions'][0]['*'] + + return self._html + + @property + def content(self): + ''' + Plain text content of the page, excluding images, tables, and other data. + ''' + + if not getattr(self, '_content', False): + query_params = { + 'prop': 'extracts', + 'explaintext': '', + 'titles': self.title + } + + request = _wiki_request(**query_params) + self._content = request['query']['pages'][self.pageid]['extract'] + + return self._content + + @property + def summary(self): + ''' + Plain text summary of the page. + + Keyword arguments: + + * sentences - if set, return the first `sentences` sentences + * chars - if set, return only the first `chars` characters. + ''' + + # cache the most common form of invoking summary + if not getattr(self, '_summary', False): + query_params = { + 'prop': 'extracts', + 'explaintext': '', + 'exintro': '', + 'titles': self.title + } + + request = _wiki_request(**query_params) + self._summary = request['query']['pages'][self.pageid]['extract'] + + return self._summary + + @property + def images(self): + ''' + List of URLs of images on the page. + ''' + + if not getattr(self, '_images', False): + query_params = { + 'generator': 'images', + 'gimlimit': 'max', + 'prop': 'imageinfo', + 'iiprop': 'url', + 'titles': self.title, + } + + request = _wiki_request(**query_params) + + image_keys = request['query']['pages'].keys() + images = (request['query']['pages'][key] for key in image_keys) + self._images = [image['imageinfo'][0]['url'] for image in images if image.get('imageinfo')] + + return self._images + + @property + def references(self): + ''' + List of URLs of external links on a page. + May include external links within page that aren't technically cited anywhere. + ''' + + if not getattr(self, '_references', False): + query_params = { + 'prop': 'extlinks', + 'ellimit': 'max', + 'titles': self.title, + } + + request = _wiki_request(**query_params) + + links = request['query']['pages'][self.pageid]['extlinks'] + relative_urls = (link['*'] for link in links) + + def add_protocol(url): + return url if url.startswith('http') else 'http:' + url + + self._references = [add_protocol(url) for url in relative_urls] + + return self._references + + @property + def links(self): + ''' + List of titles of Wikipedia page links on a page. + + .. note:: Only includes articles from namespace 0, meaning no Category, User talk, or other meta-Wikipedia pages. + ''' + + if not getattr(self, '_links', False): + self._links = [] + + request = { + 'prop': 'links', + 'plnamespace': 0, + 'pllimit': 'max', + 'titles': self.title, + } + lastContinue = {} + + # based on https://www.mediawiki.org/wiki/API:Query#Continuing_queries + while True: + params = request.copy() + params.update(lastContinue) + + request = _wiki_request(**params) + self._links.extend([link['title'] for link in request['query']['pages'][self.pageid]['links']]) + + if 'continue' not in request: + break + + lastContinue = request['continue'] + + return self._links + + +def donate(): + ''' + Open up the Wikimedia donate page in your favorite browser. + ''' + import webbrowser + webbrowser.open('https://donate.wikimedia.org/w/index.php?title=Special:FundraiserLandingPage', new=2) + + +def _wiki_request(**params): + ''' + Make a request to the Wikipedia API using the given search parameters. + Returns a parsed dict of the JSON response. + ''' + params['format'] = 'json' + params['action'] = 'query' + + headers = { + 'User-Agent': 'wikipedia (https://github.com/goldsmith/Wikipedia/)' + } + + r = requests.get(api_url, params=params, headers=headers) + + return r.json() diff --git a/src/wolfram.py b/src/wolfram.py new file mode 100644 index 0000000..bfa297b --- /dev/null +++ b/src/wolfram.py @@ -0,0 +1,62 @@ +import urllib2 +import xml.etree.ElementTree as ET + + +class Wolfram: + def __init__(self, speaker, key): + self.speaker = speaker + self.key = key + + def process(self, job, controller): + if job.get_is_processed(): + return False + if not self.key: + self.speaker.say("Please provide an API key to query Wolfram Alpha.") + return False + response = self.query(job.recorded(), self.key) + if response.find('No results') != -1: + return False + elif response == "Pulling up visual.": + self.speaker.say(response) + self.open(False, job.recorded(), controller) + else: + self.speaker.say(response) + + job.is_processed = True + return True + + def query(self, phrase, key): + phrase = phrase.replace(' ', '%20') + w_url = "http://api.wolframalpha.com/v2/query?input=" + phrase + "&appid=" + key + xml_data = urllib2.urlopen(w_url).read() + root = ET.fromstring(xml_data) + + # Parse response + try: + pods = root.findall('.//pod') + if pods == []: + raise StopIteration() + + # if first and second pods are input interpretation and response, stop and ignore + if pods[0].attrib['title'] == "Input interpretation" and \ + pods[1].attrib['title'] == "Response": + raise StopIteration() + + for pod in pods: + # skip input human response (we are doing that ourselves) and input interpretation + if pod.attrib['title'] != "Response" and \ + pod.attrib['title'] != "Input interpretation": + plaintexts = pod.findall('.//plaintext') + text = plaintexts[0].text + if text is not None and len(text) < 100: + return "the answer is " + \ + text.replace("°", ' degrees ').encode('ascii', 'ignore') + else: + return "Pulling up visual." + + except StopIteration: + return "No results" + + def open(self, wolfram, text, controller): + wolfram_url = "http://www.wolframalpha.com/input/?i=" + text.replace(" ", "+") + controller.open(wolfram_url) \ No newline at end of file diff --git a/wav/sleepy/sleep1.wav b/wav/sleepy/sleep1.wav new file mode 100644 index 0000000..cee5c56 Binary files /dev/null and b/wav/sleepy/sleep1.wav differ diff --git a/wav/sleepy/sleep2.wav b/wav/sleepy/sleep2.wav new file mode 100644 index 0000000..af83e2c Binary files /dev/null and b/wav/sleepy/sleep2.wav differ diff --git a/wav/sleepy/sleep3.wav b/wav/sleepy/sleep3.wav new file mode 100644 index 0000000..a546c92 Binary files /dev/null and b/wav/sleepy/sleep3.wav differ diff --git a/wav/sleepy/sleep4.wav b/wav/sleepy/sleep4.wav new file mode 100644 index 0000000..6f95717 Binary files /dev/null and b/wav/sleepy/sleep4.wav differ diff --git a/wav/sleepy/sleep5.wav b/wav/sleepy/sleep5.wav new file mode 100644 index 0000000..be7ab9e Binary files /dev/null and b/wav/sleepy/sleep5.wav differ diff --git a/wav/sorry/sorry1.wav b/wav/sorry/sorry1.wav new file mode 100644 index 0000000..bd2ffcb Binary files /dev/null and b/wav/sorry/sorry1.wav differ diff --git a/wav/sorry/sorry2.wav b/wav/sorry/sorry2.wav new file mode 100644 index 0000000..2bdcef6 Binary files /dev/null and b/wav/sorry/sorry2.wav differ diff --git a/wav/sorry/sorry3.wav b/wav/sorry/sorry3.wav new file mode 100644 index 0000000..b0d8d53 Binary files /dev/null and b/wav/sorry/sorry3.wav differ diff --git a/wav/sorry/sorry4.wav b/wav/sorry/sorry4.wav new file mode 100644 index 0000000..26219ed Binary files /dev/null and b/wav/sorry/sorry4.wav differ diff --git a/wav/sorry/sorry5.wav b/wav/sorry/sorry5.wav new file mode 100644 index 0000000..9e13145 Binary files /dev/null and b/wav/sorry/sorry5.wav differ