Python 大街网登陆

python  爬虫  

 大街网登陆

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import time
import json
import sys
import requests
import re

#请求对象
session = requests.session()

#请求头信息
HEADERS = {
    'Referer': 'https://passport.lagou.com/login/login.html',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0',
}

def login(username, passwd):
    login_headers = HEADERS.copy()
    login_headers.update({
        'Referer':'https://www.dajie.com/',
        'x-requested-with':'XMLHttpRequest',
        'Host':'www.dajie.com'
    })
    postData = {
            'captcha' : '',
            'email' : username,
            'password': passwd,
            'rememberMe': '1'
        }
    response=session.post('https://www.dajie.com/account/newloginsubmitm?callback=NEW_VERSION_LOGIN_CALLBACK&_CSRFToken=&ajax=1', data=postData, headers=login_headers)
    print(response.content)

    login_headers = HEADERS.copy()
    login_headers.update({
        'Host':'job.dajie.com',
        'Referer':'https://www.dajie.com/'
    })
    response=session.get('https://job.dajie.com/auth/checking',  headers=login_headers)
    print(response.text)


if __name__ == "__main__":
    username=''
    passwd=''
    login(username, passwd)

Python登陆58同城

python  爬虫  selenium  

登陆58同城,提供两种版本

第1种是模拟输入用户名密码登陆

第2中的调用58自带的js获取登陆参数在登陆

# -*- coding: utf-8 -*-
import time, sys, re
import requests
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from PIL import Image

username=''
passwd=''

driver=webdriver.PhantomJS(executable_path='C:\\Python27\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe')
driver.get('https://passport.58.com/login')
time.sleep(2)

pwdLogin=driver.find_element_by_id('pwdLogin')
pwdLogin.click()

# 输入用户名
usernameUser=driver.find_element_by_id('usernameUser')
usernameUser.send_keys(username)

time.sleep(1)
passwordUserText=driver.find_element_by_id('passwordUserText')
passwordUserText.click()

# 输入密码
passwordUser=driver.find_element_by_id('passwordUser')
passwordUser.send_keys(passwd)

# 点击登陆
btnSubmitUser=driver.find_element_by_id('btnSubmitUser')
btnSubmitUser.click()
time.sleep(3)

''' 获取驱动Cookie '''
dict1_cookie={}
cookie_tmp=[]
for cookie in driver.get_cookies():
    data="{}={}".format(cookie['name'], cookie['value'])
    dict1_cookie[cookie['name']]=cookie['value']
    cookie_tmp.append(data)
_cookie=';'.join(cookie_tmp)


HEADERS={
    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
    "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language":"zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
    "Accept-Encoding":"gzip, deflate, br",
    "Connection":"keep-alive",
    "Host":"my.58.com",
    "Cookie":_cookie
}

''' 通过COOKIE抓取数据'''
session = requests.session()
response=session.get('https://my.58.com/index', headers=HEADERS)
print(response.text)


第2中方法,调用自带的js进行模拟登陆

# -*- coding: utf-8 -*-
import time, sys, re, json
import requests
from selenium import webdriver
from selenium.webdriver import DesiredCapabilities

username=''
passwd=''

''' 设置浏览器的User-Agent '''
desired_capabilities= DesiredCapabilities.PHANTOMJS.copy()
desired_capabilities['phantomjs.page.customHeaders.User-Agent'] = (
    'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'
)
driver=webdriver.PhantomJS(executable_path='C:\\Python27\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe', desired_capabilities=desired_capabilities)

driver.get('https://passport.58.com/login')
time.sleep(1)

''' 执行58js获取加密串 '''
rsaModulus=driver.find_element_by_id('rsaModulus').get_attribute('value')
rsaExponent=driver.find_element_by_id('rsaExponent').get_attribute('value')

''' 获取加密串密码 '''
timespan=str(int(round(time.time() * 1000)))
p1_user="return encryptString('{}{}', '{}', '{}')"
encrypt_passwd=driver.execute_script(p1_user.format(timespan, passwd, rsaExponent, rsaModulus))

Fingerprint2=driver.execute_script('return new Fingerprint2().get()')
getTokenId=driver.execute_script('return getTokenId()')
fingerprint=driver.find_element_by_id('fingerprint').get_attribute('value')

session = requests.session()

headers={
     "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0",
    "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Origin":"https://passport.58.com",
    'Content-Type':'application/x-www-form-urlencoded',
    "Upgrade-Insecure-Requests":"1",
    'Referer':'https://passport.58.com/login?path=http://my.58.com/?pts=' + str(int(round(time.time() * 1000)))
}

postData={
    "source":"pc-login",
    "path":'http://my.58.com/?pts=' + str(int(round(time.time() * 1000))),
    "password":encrypt_passwd,
    "timesign":'',
    "isremember":"false",
    "callback":"successFun",
    "yzmstate":"",
    "fingerprint":"",
    "finger2":fingerprint,
    "tokenId":getTokenId,
    "username":username,
    "validcode":"",
    "vcodekey":"",
    "btnSubmit":"登录中..."
}

rep=session.post('https://passport.58.com/login/dologin', data=postData, headers=headers)
match=re.search('\((\{.*?\})\)', rep.text)
if match:
    res_json=json.loads(match.group(1))
    print(res_json)
    if res_json['code'] == 0:
        print('登陆成功!')
    else:
        print(res_json['msg'])

133.jpg

Selenium使用浏览器cookie登录

python  爬虫  Selenium  

需要注意添加cookie需要设置path secure还有需要注意的一点是,要先打开一个同域下面的网站,在添加Cookie 在打开对应页面.

Python前程无忧企业版自动登陆

python  爬虫  

1.前程无忧的验证码是一张顺序错乱的图,需要用html来进行排序

2.排序后根据验证码提示,选取指定的坐标,控制台可以看.

1.png

Python登陆拉钩网

python  爬虫  

#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
import time
import json
import sys
import subprocess
import requests
import hashlib
import re

#请求对象
session = requests.session()

#请求头信息
HEADERS = {
    'Referer': 'https://passport.lagou.com/login/login.html',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0',
}

def encrypt_password(passwd):
    '''对密码进行了md5双重加密 veennike 这个值是在js文件找到的一个写死的值 '''
    passwd = hashlib.md5(passwd.encode('utf-8')).hexdigest()
    passwd = 'veenike'+passwd+'veenike'
    passwd = hashlib.md5(passwd.encode('utf-8')).hexdigest()
    return passwd

def get_token():
    login_page = 'https://passport.lagou.com/login/login.html'
    data = session.get(login_page, headers=HEADERS)
    X_Anti_Forge_Token=re.search('(\w+\-\w+\-\w+\-\w+\-\w+)', data.content)
    X_Anti_Forge_Code=re.search('X_Anti_Forge_Code.*?\'(\d+)\'', data.content)
    return (X_Anti_Forge_Token.group(1), X_Anti_Forge_Code.group(1))

def login(username, passwd):
    X_Anti_Forge_Token,X_Anti_Forge_Code=get_token()
    login_headers = HEADERS.copy()
    login_headers.update({'X-Requested-With':'XMLHttpRequest','X-Anit-Forge-Token':X_Anti_Forge_Token,'X-Anit-Forge-Code':X_Anti_Forge_Code})
    postData = {
            'isValidate' : 'true',
            'username' : username,
            'password': encrypt_password(passwd),
            'request_form_verifyCode': '',
            'submit': '',
        }
    response=session.post('https://passport.lagou.com/login/login.json', data=postData, headers=login_headers)
    print response.content

    del login_headers['Referer']
    del login_headers['X-Requested-With']
    del login_headers['X-Anit-Forge-Token']
    del login_headers['X-Anit-Forge-Code']

    req = session.get('https://easy.lagou.com/dashboard/index.htm?from=gray', headers=login_headers)
    print req.content

if __name__ == "__main__":
    username=''
    passwd=''
    login(username, passwd)

Scrapy爬取老司机博客

scrapy  爬虫  python  Rule  

Scrapy 使用Rule 分页 爬取老司机博客.

Scrapy爬取知乎用户信息

爬虫  xpath  

转载 https://github.com/Germey/Zhihu/blob/master/zhihuuser/spiders/zhihu.py

Xpath常用语法

scrapy  爬虫  python  

Xpath常用语法

http://www.w3school.com.cn/xpath/xpath_syntax.asp

Scrapy自动分页爬取博客内容

selenium  爬虫  python  

Scrapy自动分页爬取博客内容

Selenium常用语法

selenium  爬虫  python  

Selenium常用语法,原文地址http://www.cnblogs.com/luxiaojun/p/6144748.html

selenium模拟自动登陆

selenium  爬虫  python  打码  

python2.7 Selenium 打码兔实现自动登陆phpcms后台 

selenium模拟搜索点击搜索

selenium  爬虫  python  

Selenium模拟搜索点击搜索功能.

Python Selenium下拉获取数据

selenium  爬虫  python  

通过urllib2直接访问网页获取后的源代码补全,一些比如手机下滑加载的数据无法获得,通过Selenium可以下拉获取.

selenium动态下拉到底部

selenium  爬虫  python  

selenium动态下拉到底部,实现获取动态加载的数据.

python Gooseekek生成规则

gooseekek  爬虫  python  

通过Gooseekek生成爬虫规则,然后在使用Python自动解析数据.

scrapy自动登录爬取数据

scrapy  爬虫  python  

scrapy自动登录爬取数据WKINFO数据.

python2.7打码兔自动打码

爬虫  python  打码兔  

打码兔自动打码接口,官方只提供python3.2版本现修改为支持python2.7版本.

python抓取深圳东莞社保后台用户

python  打码  自动登录  

Python抓取自动登录自动打码抓取社保后台账户,目前这个版本仅仅是抓取深圳/东莞社保局的,打码采用联众打码平台,代码抓取后自动保存数据,写入数据部分,自己微调下.