translate url with google translate from python script
I'm trying to use google translate from a python script:
#!/usr/bin/env python
from urllib2 import urlopen
from urllib import urlencode
base_url = "http://www.google.com/translate?"
params = (('langpair','en|es'), ('u','http://asdf.com'),)
url = base_url+urlencode(params)
print "Encoded URL: %s" % url
print urlopen(url).read()
I'm getting the error 403 when I use it.
# ./1.py
Encoded URL: http://www.google.com/translate?langpair=en%7Ces&u=http%3A%2F%2Fasdf.com
Traceback (most recent call last):
开发者_如何转开发...
urllib2.HTTPError: HTTP Error 403: Forbidden
However the same URL works fine when accessed from browser. Could anyone spot the error? Or is it that google does not allow this type of usage?
Thanks in advance
If Google doesn't let you do this, you could programatically translate the normal website's source via the Google's APIs.
I wrote a function for this a little while back:
def translate(text, src = '', to = 'en'):
parameters = ({'langpair': '{0}|{1}'.format(src, to), 'v': '1.0' })
translated = ''
for text in (text[index:index + 4500] for index in range(0, len(text), 4500)):
parameters['q'] = text
response = json.loads(urllib.request.urlopen('http://ajax.googleapis.com/ajax/services/language/translate', data = urllib.parse.urlencode(parameters).encode('utf-8')).read().decode('utf-8'))
try:
translated += response['responseData']['translatedText']
except:
pass
return translated
You should be using the google API. I found and tested this code, it works:
#!/usr/bin/env python
from urllib2 import urlopen
from urllib import urlencode
import sys
lang1=sys.argv[1]
lang2=sys.argv[2]
langpair='%s|%s'%(lang1,lang2)
text=' '.join(sys.argv[3:])
base_url='http://ajax.googleapis.com/ajax/services/language/translate?'
params=urlencode( (('v',1.0),
('q',text),
('langpair',langpair),) )
url=base_url+params
content=urlopen(url).read()
start_idx=content.find('"translatedText":"')+18
translation=content[start_idx:]
end_idx=translation.find('"}, "')
translation=translation[:end_idx]
print translation
source
You want to use the offical Google Translate APIs:
http://code.google.com/intl/de-DE/apis/language/translate/overview.html
Apart from that:
http://www.catonmat.net/blog/python-library-for-google-search/
your problem is because you have no headers
(which tells google what is your browser and compatibility)
I had this error before when I made my google translate api
you can find it here: https://github.com/mouuff/Google-Translate-API
you can use a much better python code for translating with google:
SOURCE: https://neculaifantanaru.com/en/python-code-text-google-translate-website-translation-beautifulsoup-new.html
from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
import requests
import sys
import os
class UnsortedAttributes(HTMLFormatter):
def attributes(self, tag):
for k, v in tag.attrs.items():
yield k, v
files_from_folder = r"c:\Folder2"
use_translate_folder = True
destination_language = 'vi' #aici schimbi limba in care vrei sa traduci
extension_file = ".html"
directory = os.fsencode(files_from_folder)
def translate(text, target_language):
url = "https://translate.google.com/translate_a/single"
headers = {
"Host": "translate.google.com",
"Accept": "*/*",
"Cookie": "",
"User-Agent": "GoogleTranslate/5.9.59004 (iPhone; iOS 10.2; ja; iPhone9,1)",
"Accept-Language": "fr",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
}
sentence = text
params = {
"client": "it",
"dt": ["t", "rmt", "bd", "rms", "qca", "ss", "md", "ld", "ex"],
"otf": "2",
"dj": "1",
"q": sentence,
"hl": "ja",
"ie": "UTF-8",
"oe": "UTF-8",
"sl": "en",
"tl": target_language,
}
res = requests.get(
url=url,
headers=headers,
params=params,
)
res = res.json()
paragraph = ''
for i in range(0, len(res["sentences"])):
paragraph += res["sentences"][i]["trans"]
return paragraph
def recursively_translate(node):
for x in range(len(node.contents)):
if isinstance(node.contents[x], str):
if node.contents[x].strip() != '':
try:
node.contents[x].replaceWith(translate(text=node.contents[x], target_language=destination_language))
except:
pass
elif node.contents[x] != None:
recursively_translate(node.contents[x])
for file in os.listdir(directory):
filename = os.fsdecode(file)
print(filename)
if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': #ignore this 2 files
continue
if filename.endswith(extension_file):
with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
for title in soup.findAll('title'):
recursively_translate(title)
for meta in soup.findAll('meta', {'name':'description'}):
try:
meta['content'] = translate(text=meta['content'], target_language=destination_language)
except:
pass
for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(h1)) < end_comment:
recursively_translate(h1)
for p in soup.findAll('p', class_='text_obisnuit'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(p)) < end_comment:
recursively_translate(p)
for p in soup.findAll('p', class_='text_obisnuit2'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(p)) < end_comment:
recursively_translate(p)
for span in soup.findAll('span', class_='text_obisnuit2'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(span)) < end_comment:
recursively_translate(span)
for li in soup.findAll('li', class_='text_obisnuit'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(li)) < end_comment:
recursively_translate(li)
for a in soup.findAll('a', class_='linkMare'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(a)) < end_comment:
recursively_translate(a)
for h4 in soup.findAll('h4', class_='text_obisnuit2'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(h4)) < end_comment:
recursively_translate(h4)
for h5 in soup.findAll('h5', class_='text_obisnuit2'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(h5)) < end_comment:
recursively_translate(h5)
for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_webinar'):
begin_comment = str(soup).index('<!-- ARTICOL START -->')
end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
if begin_comment < str(soup).index(str(h1)) < end_comment:
recursively_translate(h1)
print(f'{filename} translated')
soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
if use_translate_folder:
try:
with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
new_html.write(soup[5:-6])
except:
os.mkdir(files_from_folder+r'\translated')
with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
new_html.write(soup[5:-6])
else:
with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
html.write(soup[5:-6])
精彩评论