开发者

translate url with google translate from python script

I'm trying to use google translate from a python script:

#!/usr/bin/env python
from urllib2 import urlopen
from urllib import urlencode

base_url = "http://www.google.com/translate?"
params = (('langpair','en|es'), ('u','http://asdf.com'),)
url = base_url+urlencode(params)
print "Encoded URL: %s" % url 
print urlopen(url).read()

I'm getting the error 403 when I use it.

# ./1.py 
Encoded URL: http://www.google.com/translate?langpair=en%7Ces&u=http%3A%2F%2Fasdf.com
Traceback (most recent call last):
开发者_如何转开发...
urllib2.HTTPError: HTTP Error 403: Forbidden

However the same URL works fine when accessed from browser. Could anyone spot the error? Or is it that google does not allow this type of usage?

Thanks in advance


If Google doesn't let you do this, you could programatically translate the normal website's source via the Google's APIs.

I wrote a function for this a little while back:

def translate(text, src = '', to = 'en'):
  parameters = ({'langpair': '{0}|{1}'.format(src, to), 'v': '1.0' })
  translated = ''

  for text in (text[index:index + 4500] for index in range(0, len(text), 4500)):
    parameters['q'] = text
    response = json.loads(urllib.request.urlopen('http://ajax.googleapis.com/ajax/services/language/translate', data = urllib.parse.urlencode(parameters).encode('utf-8')).read().decode('utf-8'))

    try:
      translated += response['responseData']['translatedText']
    except:
      pass

  return translated


You should be using the google API. I found and tested this code, it works:

#!/usr/bin/env python
from urllib2 import urlopen
from urllib import urlencode
import sys

lang1=sys.argv[1] lang2=sys.argv[2] langpair='%s|%s'%(lang1,lang2) text=' '.join(sys.argv[3:]) base_url='http://ajax.googleapis.com/ajax/services/language/translate?' params=urlencode( (('v',1.0), ('q',text), ('langpair',langpair),) ) url=base_url+params content=urlopen(url).read() start_idx=content.find('"translatedText":"')+18 translation=content[start_idx:] end_idx=translation.find('"}, "') translation=translation[:end_idx] print translation

source


You want to use the offical Google Translate APIs:

http://code.google.com/intl/de-DE/apis/language/translate/overview.html

Apart from that:

http://www.catonmat.net/blog/python-library-for-google-search/


your problem is because you have no headers

(which tells google what is your browser and compatibility)

I had this error before when I made my google translate api

you can find it here: https://github.com/mouuff/Google-Translate-API


you can use a much better python code for translating with google:

SOURCE: https://neculaifantanaru.com/en/python-code-text-google-translate-website-translation-beautifulsoup-new.html

from bs4 import BeautifulSoup
from bs4.formatter import HTMLFormatter
import requests
import sys
import os

class UnsortedAttributes(HTMLFormatter):
    def attributes(self, tag):
        for k, v in tag.attrs.items():
            yield k, v

files_from_folder = r"c:\Folder2"
use_translate_folder = True
destination_language = 'vi'  #aici schimbi limba in care vrei sa traduci
extension_file = ".html"
directory = os.fsencode(files_from_folder)

def translate(text, target_language):
    url = "https://translate.google.com/translate_a/single"
    headers = {
        "Host": "translate.google.com",
        "Accept": "*/*",
        "Cookie": "",
        "User-Agent": "GoogleTranslate/5.9.59004 (iPhone; iOS 10.2; ja; iPhone9,1)",
        "Accept-Language": "fr",
        "Accept-Encoding": "gzip, deflate",
        "Connection": "keep-alive",
        }
    sentence = text
    params = {
        "client": "it",
        "dt": ["t", "rmt", "bd", "rms", "qca", "ss", "md", "ld", "ex"],
        "otf": "2",
        "dj": "1",
        "q": sentence,
        "hl": "ja",
        "ie": "UTF-8",
        "oe": "UTF-8",
        "sl": "en",
        "tl": target_language,
        }

    res = requests.get(
        url=url,
        headers=headers,
        params=params,
        )

    res = res.json()

    paragraph = ''
    for i in range(0, len(res["sentences"])):
        paragraph += res["sentences"][i]["trans"]

    return paragraph

def recursively_translate(node):
    for x in range(len(node.contents)):
        if isinstance(node.contents[x], str):
            if node.contents[x].strip() != '':
                try:
                    node.contents[x].replaceWith(translate(text=node.contents[x], target_language=destination_language))
                except:
                    pass
        elif node.contents[x] != None:
            recursively_translate(node.contents[x])

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    print(filename)
    if filename == 'y_key_e479323ce281e459.html' or filename == 'directory.html': #ignore this 2 files
        continue
    if filename.endswith(extension_file):
        with open(os.path.join(files_from_folder, filename), encoding='utf-8') as html:
            soup = BeautifulSoup('<pre>' + html.read() + '</pre>', 'html.parser')
            for title in soup.findAll('title'):
                recursively_translate(title)

            for meta in soup.findAll('meta', {'name':'description'}):
                try:
                    meta['content'] = translate(text=meta['content'], target_language=destination_language)
                except:
                    pass

            for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_articol'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h1)) < end_comment:
                    recursively_translate(h1)

            for p in soup.findAll('p', class_='text_obisnuit'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(p)) < end_comment:
                    recursively_translate(p)

            for p in soup.findAll('p', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(p)) < end_comment:
                    recursively_translate(p)

            for span in soup.findAll('span', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(span)) < end_comment:
                    recursively_translate(span)

            for li in soup.findAll('li', class_='text_obisnuit'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(li)) < end_comment:
                    recursively_translate(li)

            for a in soup.findAll('a', class_='linkMare'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(a)) < end_comment:
                    recursively_translate(a)

            for h4 in soup.findAll('h4', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h4)) < end_comment:
                    recursively_translate(h4)

            for h5 in soup.findAll('h5', class_='text_obisnuit2'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h5)) < end_comment:
                    recursively_translate(h5)

            for h1 in soup.findAll('h1', {'itemprop':'name'}, class_='den_webinar'):
                begin_comment = str(soup).index('<!-- ARTICOL START -->')
                end_comment = str(soup).index('<!-- ARTICOL FINAL -->')
                if begin_comment < str(soup).index(str(h1)) < end_comment:
                    recursively_translate(h1)

        print(f'{filename} translated')
        soup = soup.encode(formatter=UnsortedAttributes()).decode('utf-8')
        new_filename = f'{filename.split(".")[0]}_{destination_language}.html'
        if use_translate_folder:
            try:
                with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
                    new_html.write(soup[5:-6])
            except:
                os.mkdir(files_from_folder+r'\translated')
                with open(os.path.join(files_from_folder+r'\translated', new_filename), 'w', encoding='utf-8') as new_html:
                    new_html.write(soup[5:-6])
        else:
            with open(os.path.join(files_from_folder, new_filename), 'w', encoding='utf-8') as html:
                html.write(soup[5:-6])
0

上一篇:

下一篇:

精彩评论

暂无评论...
验证码 换一张
取 消

最新问答

问答排行榜