Python 大街网登陆

python  爬虫  

 大街网登陆

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import time
import json
import sys
import requests
import re

#请求对象
session = requests.session()

#请求头信息
HEADERS = {
    'Referer': 'https://passport.lagou.com/login/login.html',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0',
}

def login(username, passwd):
    login_headers = HEADERS.copy()
    login_headers.update({
        'Referer':'https://www.dajie.com/',
        'x-requested-with':'XMLHttpRequest',
        'Host':'www.dajie.com'
    })
    postData = {
            'captcha' : '',
            'email' : username,
            'password': passwd,
            'rememberMe': '1'
        }
    response=session.post('https://www.dajie.com/account/newloginsubmitm?callback=NEW_VERSION_LOGIN_CALLBACK&_CSRFToken=&ajax=1', data=postData, headers=login_headers)
    print(response.content)

    login_headers = HEADERS.copy()
    login_headers.update({
        'Host':'job.dajie.com',
        'Referer':'https://www.dajie.com/'
    })
    response=session.get('https://job.dajie.com/auth/checking',  headers=login_headers)
    print(response.text)


if __name__ == "__main__":
    username=''
    passwd=''
    login(username, passwd)

Python登陆58同城

python  爬虫  selenium  

登陆58同城,提供两种版本

第1种是模拟输入用户名密码登陆

第2中的调用58自带的js获取登陆参数在登陆

# -*- coding: utf-8 -*-
import time, sys, re
import requests
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from PIL import Image

username=''
passwd=''

driver=webdriver.PhantomJS(executable_path='C:\\Python27\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe')
driver.get('https://passport.58.com/login')
time.sleep(2)

pwdLogin=driver.find_element_by_id('pwdLogin')
pwdLogin.click()

# 输入用户名
usernameUser=driver.find_element_by_id('usernameUser')
usernameUser.send_keys(username)

time.sleep(1)
passwordUserText=driver.find_element_by_id('passwordUserText')
passwordUserText.click()

# 输入密码
passwordUser=driver.find_element_by_id('passwordUser')
passwordUser.send_keys(passwd)

# 点击登陆
btnSubmitUser=driver.find_element_by_id('btnSubmitUser')
btnSubmitUser.click()
time.sleep(3)

''' 获取驱动Cookie '''
dict1_cookie={}
cookie_tmp=[]
for cookie in driver.get_cookies():
    data="{}={}".format(cookie['name'], cookie['value'])
    dict1_cookie[cookie['name']]=cookie['value']
    cookie_tmp.append(data)
_cookie=';'.join(cookie_tmp)


HEADERS={
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
    "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language":"zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
    "Accept-Encoding":"gzip, deflate, br",
    "Connection":"keep-alive",
    "Host":"my.58.com",
    "Cookie":_cookie
}

''' 通过COOKIE抓取数据'''
session = requests.session()
response=session.get('https://my.58.com/index', headers=HEADERS)
print(response.text)


第2中方法,调用自带的js进行模拟登陆

# -*- coding: utf-8 -*-
import time, sys, re, json
import requests
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities

username=''
passwd=''

''' 设置浏览器的User-Agent '''
desired_capabilities= DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = (
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
)
driver=webdriver.PhantomJS(executable_path='C:\\Python27\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe', desired_capabilities=desired_capabilities)

driver.get('https://passport.58.com/login')
time.sleep(1)

''' 执行58js获取加密串 '''
rsaModulus=driver.find_element_by_id('rsaModulus').get_attribute('value')
rsaExponent=driver.find_element_by_id('rsaExponent').get_attribute('value')

''' 获取加密串密码 '''
timespan=str(int(round(time.time() * 1000)))
p1_user="return encryptString('{}{}', '{}', '{}')"
encrypt_passwd=driver.execute_script(p1_user.format(timespan, passwd, rsaExponent, rsaModulus))

Fingerprint2=driver.execute_script('return new Fingerprint2().get()')
getTokenId=driver.execute_script('return getTokenId()')
fingerprint=driver.find_element_by_id('fingerprint').get_attribute('value')

session = requests.session()

headers={
     "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
    "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Origin":"https://passport.58.com",
    'Content-Type':'application/x-www-form-urlencoded',
    "Upgrade-Insecure-Requests":"1",
    'Referer':'https://passport.58.com/login?path=http://my.58.com/?pts=' + str(int(round(time.time() * 1000)))
}

postData={
    "source":"pc-login",
    "path":'http://my.58.com/?pts=' + str(int(round(time.time() * 1000))),
    "password":encrypt_passwd,
    "timesign":'',
    "isremember":"false",
    "callback":"successFun",
    "yzmstate":"",
    "fingerprint":"",
    "finger2":fingerprint,
    "tokenId":getTokenId,
    "username":username,
    "validcode":"",
    "vcodekey":"",
    "btnSubmit":"登录中..."
}

rep=session.post('https://passport.58.com/login/dologin', data=postData, headers=headers)
match=re.search('\((\{.*?\})\)', rep.text)
if match:
    res_json=json.loads(match.group(1))
    print(res_json)
    if res_json['code'] == 0:
        print('登陆成功!')
    else:
        print(res_json['msg'])

133.jpg

Selenium使用浏览器cookie登录

python  爬虫  Selenium  

需要注意添加cookie需要设置path secure还有需要注意的一点是,要先打开一个同域下面的网站,在添加Cookie 在打开对应页面.

Python登陆拉钩网

python  爬虫  

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import time
import json
import sys
import subprocess
import requests
import hashlib
import re

#请求对象
session = requests.session()

#请求头信息
HEADERS = {
    'Referer': 'https://passport.lagou.com/login/login.html',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0',
}

def encrypt_password(passwd):
    '''对密码进行了md5双重加密 veennike 这个值是在js文件找到的一个写死的值 '''
    passwd = hashlib.md5(passwd.encode('utf-8')).hexdigest()
    passwd = 'veenike'+passwd+'veenike'
    passwd = hashlib.md5(passwd.encode('utf-8')).hexdigest()
    return passwd

def get_token():
    login_page = 'https://passport.lagou.com/login/login.html'
    data = session.get(login_page, headers=HEADERS)
    X_Anti_Forge_Token=re.search('(\w+\-\w+\-\w+\-\w+\-\w+)', data.content)
    X_Anti_Forge_Code=re.search('X_Anti_Forge_Code.*?\'(\d+)\'', data.content)
    return (X_Anti_Forge_Token.group(1), X_Anti_Forge_Code.group(1))

def login(username, passwd):
    X_Anti_Forge_Token,X_Anti_Forge_Code=get_token()
    login_headers = HEADERS.copy()
    login_headers.update({'X-Requested-With':'XMLHttpRequest','X-Anit-Forge-Token':X_Anti_Forge_Token,'X-Anit-Forge-Code':X_Anti_Forge_Code})
    postData = {
            'isValidate' : 'true',
            'username' : username,
            'password': encrypt_password(passwd),
            'request_form_verifyCode': '',
            'submit': '',
        }
    response=session.post('https://passport.lagou.com/login/login.json', data=postData, headers=login_headers)
    print response.content

    del login_headers['Referer']
    del login_headers['X-Requested-With']
    del login_headers['X-Anit-Forge-Token']
    del login_headers['X-Anit-Forge-Code']

    req = session.get('https://easy.lagou.com/dashboard/index.htm?from=gray', headers=login_headers)
    print req.content

if __name__ == "__main__":
    username=''
    passwd=''
    login(username, passwd)

Scrapy爬取老司机博客

scrapy  爬虫  python  Rule  

Scrapy 使用Rule 分页 爬取老司机博客.

Scrapy爬取知乎用户信息

爬虫  xpath  

转载 https://github.com/Germey/Zhihu/blob/master/zhihuuser/spiders/zhihu.py

Xpath常用语法

scrapy  爬虫  python  

Xpath常用语法

http://www.w3school.com.cn/xpath/xpath_syntax.asp

Scrapy自动分页爬取博客内容

selenium  爬虫  python  

Scrapy自动分页爬取博客内容

Selenium常用语法

selenium  爬虫  python  

Selenium常用语法,原文地址http://www.cnblogs.com/luxiaojun/p/6144748.html

selenium模拟自动登陆

selenium  爬虫  python  打码  

python2.7 Selenium 打码兔实现自动登陆phpcms后台 

selenium模拟搜索点击搜索

selenium  爬虫  python  

Selenium模拟搜索点击搜索功能.

selenium动态下拉到底部

selenium  爬虫  python  

selenium动态下拉到底部,实现获取动态加载的数据.

python Gooseekek生成规则

gooseekek  爬虫  python  

通过Gooseekek生成爬虫规则,然后在使用Python自动解析数据.

scrapy自动登录爬取数据

scrapy  爬虫  python  

scrapy自动登录爬取数据WKINFO数据.

python抓取深圳东莞社保后台用户

python  打码  自动登录  

Python抓取自动登录自动打码抓取社保后台账户,目前这个版本仅仅是抓取深圳/东莞社保局的,打码采用联众打码平台,代码抓取后自动保存数据,写入数据部分,自己微调下.