mirror of
https://bitbucket.org/wisemapping/wisemapping-open-source.git
synced 2024-11-22 22:27:55 +01:00
- Add Mediapartners-Google add sense crawler
This commit is contained in:
parent
c4c3f30303
commit
6008376ad5
@ -1,343 +1,344 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright [2011] [wisemapping]
|
* Copyright [2011] [wisemapping]
|
||||||
*
|
*
|
||||||
* Licensed under WiseMapping Public License, Version 1.0 (the "License").
|
* Licensed under WiseMapping Public License, Version 1.0 (the "License").
|
||||||
* It is basically the Apache License, Version 2.0 (the "License") plus the
|
* It is basically the Apache License, Version 2.0 (the "License") plus the
|
||||||
* "powered by wisemapping" text requirement on every single page;
|
* "powered by wisemapping" text requirement on every single page;
|
||||||
* you may not use this file except in compliance with the License.
|
* you may not use this file except in compliance with the License.
|
||||||
* You may obtain a copy of the license at
|
* You may obtain a copy of the license at
|
||||||
*
|
*
|
||||||
* http://www.wisemapping.org/license
|
* http://www.wisemapping.org/license
|
||||||
*
|
*
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package com.wisemapping.filter;
|
package com.wisemapping.filter;
|
||||||
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
|
||||||
public class UserAgent implements Serializable {
|
public class UserAgent implements Serializable {
|
||||||
public static final String USER_AGENT_HEADER = "User-Agent";
|
public static final String USER_AGENT_HEADER = "User-Agent";
|
||||||
private int versionMajor = -1;
|
private int versionMajor = -1;
|
||||||
private int versionVariation = -1;
|
private int versionVariation = -1;
|
||||||
private Product product;
|
private Product product;
|
||||||
private OS os;
|
private OS os;
|
||||||
private final org.apache.commons.logging.Log logger = LogFactory.getLog(UserAgent.class.getName());
|
private final org.apache.commons.logging.Log logger = LogFactory.getLog(UserAgent.class.getName());
|
||||||
private boolean hasGCFInstalled = false;
|
private boolean hasGCFInstalled = false;
|
||||||
|
|
||||||
public static void main(final String argv[]) {
|
public static void main(final String argv[]) {
|
||||||
UserAgent explorer = UserAgent.create("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)");
|
UserAgent explorer = UserAgent.create("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)");
|
||||||
// UserAgent firefox = UserAgent.create("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20050302 Firefox/0.9.6");
|
// UserAgent firefox = UserAgent.create("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20050302 Firefox/0.9.6");
|
||||||
UserAgent safari = UserAgent.create("iCab/2.9.5 (Macintosh; U; PPC; Mac OS X)");
|
UserAgent safari = UserAgent.create("iCab/2.9.5 (Macintosh; U; PPC; Mac OS X)");
|
||||||
UserAgent opera = UserAgent.create("Opera/9.21 (Windows NT 5.1; U; en)");
|
UserAgent opera = UserAgent.create("Opera/9.21 (Windows NT 5.1; U; en)");
|
||||||
|
|
||||||
|
|
||||||
UserAgent firefox = UserAgent.create("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20050302 Firefox/1.9.6");
|
UserAgent firefox = UserAgent.create("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20050302 Firefox/1.9.6");
|
||||||
assert firefox.isBrowserSupported();
|
assert firefox.isBrowserSupported();
|
||||||
|
|
||||||
|
|
||||||
firefox = UserAgent.create("Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.7) Gecko/20070914 Firefox/2.0.0.7 Creative ZENcast v1.02.08 FirePHP/0.0.5.13");
|
firefox = UserAgent.create("Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.7) Gecko/20070914 Firefox/2.0.0.7 Creative ZENcast v1.02.08 FirePHP/0.0.5.13");
|
||||||
assert firefox.isBrowserSupported();
|
assert firefox.isBrowserSupported();
|
||||||
|
|
||||||
firefox = UserAgent.create("Mozilla/5.0 (X11; U; Linux i686; es-ES; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12");
|
firefox = UserAgent.create("Mozilla/5.0 (X11; U; Linux i686; es-ES; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12");
|
||||||
assert firefox.isBrowserSupported();
|
assert firefox.isBrowserSupported();
|
||||||
|
|
||||||
firefox = UserAgent.create("'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.7) Gecko/20070914 firefox 2.0'");
|
firefox = UserAgent.create("'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.7) Gecko/20070914 firefox 2.0'");
|
||||||
assert firefox.isBrowserSupported();
|
assert firefox.isBrowserSupported();
|
||||||
|
|
||||||
firefox = UserAgent.create("Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.12) Gecko/20080129 Iceweasel/2.0.0.12 (Debian-2.0.0.12-0etch1)");
|
firefox = UserAgent.create("Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.12) Gecko/20080129 Iceweasel/2.0.0.12 (Debian-2.0.0.12-0etch1)");
|
||||||
assert firefox.isBrowserSupported();
|
assert firefox.isBrowserSupported();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean isVersionGreatedOrEqualThan(final int mayor, final int variation) {
|
public boolean isVersionGreatedOrEqualThan(final int mayor, final int variation) {
|
||||||
return this.versionMajor > mayor || (mayor == this.versionMajor && this.versionVariation >= variation);
|
return this.versionMajor > mayor || (mayor == this.versionMajor && this.versionVariation >= variation);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isVersionLessThan(final int mayor) {
|
public boolean isVersionLessThan(final int mayor) {
|
||||||
return this.versionMajor < mayor;
|
return this.versionMajor < mayor;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getVersionMajor() {
|
public int getVersionMajor() {
|
||||||
return versionMajor;
|
return versionMajor;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getVersionVariation() {
|
public int getVersionVariation() {
|
||||||
return versionVariation;
|
return versionVariation;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Product getProduct() {
|
public Product getProduct() {
|
||||||
return product;
|
return product;
|
||||||
}
|
}
|
||||||
|
|
||||||
public OS getOs() {
|
public OS getOs() {
|
||||||
return os;
|
return os;
|
||||||
}
|
}
|
||||||
|
|
||||||
public enum Product {
|
public enum Product {
|
||||||
EXPLORER, FIREFOX, CAMINO, NETSCAPE, OPERA, SAFARI, CHROME, KONQUEOR, KMELEON, MOZILLA, LYNX, ROBOT, WEB_CRAWLER
|
EXPLORER, FIREFOX, CAMINO, NETSCAPE, OPERA, SAFARI, CHROME, KONQUEOR, KMELEON, MOZILLA, LYNX, ROBOT, WEB_CRAWLER
|
||||||
}
|
}
|
||||||
|
|
||||||
public enum OS {
|
public enum OS {
|
||||||
WINDOWS, LINUX, MAC, KNOWN
|
WINDOWS, LINUX, MAC, KNOWN
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private UserAgent(final String header) {
|
private UserAgent(final String header) {
|
||||||
parse(header);
|
parse(header);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void parse(String userAgentHeader) {
|
private void parse(String userAgentHeader) {
|
||||||
// Format ApplicationName/ApplicationVersion ();
|
// Format ApplicationName/ApplicationVersion ();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int detailStart = userAgentHeader.indexOf('(');
|
int detailStart = userAgentHeader.indexOf('(');
|
||||||
int detailEnd = userAgentHeader.indexOf(')');
|
int detailEnd = userAgentHeader.indexOf(')');
|
||||||
|
|
||||||
// Parse base format = application (productDetails) productAddition
|
// Parse base format = application (productDetails) productAddition
|
||||||
String application = userAgentHeader.substring(0, detailStart);
|
String application = userAgentHeader.substring(0, detailStart);
|
||||||
application = application.trim();
|
application = application.trim();
|
||||||
|
|
||||||
String productDetails = userAgentHeader.substring(detailStart + 1, detailEnd);
|
String productDetails = userAgentHeader.substring(detailStart + 1, detailEnd);
|
||||||
productDetails = productDetails.trim();
|
productDetails = productDetails.trim();
|
||||||
|
|
||||||
String productAddition = userAgentHeader.substring(detailEnd + 1, userAgentHeader.length());
|
String productAddition = userAgentHeader.substring(detailEnd + 1, userAgentHeader.length());
|
||||||
productAddition = productAddition.trim();
|
productAddition = productAddition.trim();
|
||||||
|
|
||||||
this.os = parseOS(productDetails);
|
this.os = parseOS(productDetails);
|
||||||
|
|
||||||
if (userAgentHeader.contains("Googlebot")) {
|
if (userAgentHeader.contains("Googlebot") || userAgentHeader.contains("Mediapartners-Google")) {
|
||||||
//"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
//"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
|
||||||
this.product = Product.WEB_CRAWLER;
|
// Mediapartners-Google -> Add sense robot
|
||||||
|
this.product = Product.WEB_CRAWLER;
|
||||||
} else if (userAgentHeader.contains("MSIE")) {
|
|
||||||
// Explorer Browser : http://msdn2.microsoft.com/en-us/library/ms537503.aspx
|
} else if (userAgentHeader.contains("MSIE")) {
|
||||||
// Format: Mozilla/MozVer (compatible; MSIE IEVer[; Provider]; Platform[; Extension]*) [Addition]
|
// Explorer Browser : http://msdn2.microsoft.com/en-us/library/ms537503.aspx
|
||||||
// SampleTest: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Google Wireless Transcoder;)
|
// Format: Mozilla/MozVer (compatible; MSIE IEVer[; Provider]; Platform[; Extension]*) [Addition]
|
||||||
|
// SampleTest: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; Google Wireless Transcoder;)
|
||||||
// Parse version ...
|
|
||||||
int index = productDetails.indexOf("MSIE") + 4;
|
// Parse version ...
|
||||||
int lastIndex = productDetails.indexOf(';', index);
|
int index = productDetails.indexOf("MSIE") + 4;
|
||||||
|
int lastIndex = productDetails.indexOf(';', index);
|
||||||
final String versionStr = productDetails.substring(index + 1, lastIndex);
|
|
||||||
parseVersion(versionStr);
|
final String versionStr = productDetails.substring(index + 1, lastIndex);
|
||||||
|
parseVersion(versionStr);
|
||||||
// Explorer Parse ...
|
|
||||||
this.product = Product.EXPLORER;
|
// Explorer Parse ...
|
||||||
this.hasGCFInstalled = productDetails.contains("chromeframe");
|
this.product = Product.EXPLORER;
|
||||||
} else if (userAgentHeader.contains("iCab") || userAgentHeader.contains("Safari")) {
|
this.hasGCFInstalled = productDetails.contains("chromeframe");
|
||||||
// Safari:
|
} else if (userAgentHeader.contains("iCab") || userAgentHeader.contains("Safari")) {
|
||||||
//Formats: Mozilla/5.0 (Windows; U; Windows NT 5.1; en) AppleWebKit/522.13.1 (KHTML, like Gecko) Version/3.0.2 Safari/522.13.1
|
// Safari:
|
||||||
//Chrome:
|
//Formats: Mozilla/5.0 (Windows; U; Windows NT 5.1; en) AppleWebKit/522.13.1 (KHTML, like Gecko) Version/3.0.2 Safari/522.13.1
|
||||||
//Formats: "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.44 Safari/534.7"
|
//Chrome:
|
||||||
String versionStr = "";
|
//Formats: "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.517.44 Safari/534.7"
|
||||||
if (userAgentHeader.contains("Chrome")) {
|
String versionStr = "";
|
||||||
this.product = Product.CHROME;
|
if (userAgentHeader.contains("Chrome")) {
|
||||||
versionStr = userAgentHeader.substring(userAgentHeader.indexOf("Chrome") + 7, userAgentHeader.lastIndexOf(" "));
|
this.product = Product.CHROME;
|
||||||
} else {
|
versionStr = userAgentHeader.substring(userAgentHeader.indexOf("Chrome") + 7, userAgentHeader.lastIndexOf(" "));
|
||||||
this.product = Product.SAFARI;
|
} else {
|
||||||
versionStr = userAgentHeader.substring(userAgentHeader.indexOf("Version") + 8, userAgentHeader.lastIndexOf(" "));
|
this.product = Product.SAFARI;
|
||||||
}
|
versionStr = userAgentHeader.substring(userAgentHeader.indexOf("Version") + 8, userAgentHeader.lastIndexOf(" "));
|
||||||
|
}
|
||||||
parseVersion(versionStr);
|
|
||||||
|
parseVersion(versionStr);
|
||||||
} else if (userAgentHeader.contains("Konqueror")) {
|
|
||||||
this.product = Product.KONQUEOR;
|
} else if (userAgentHeader.contains("Konqueror")) {
|
||||||
} else if (userAgentHeader.contains("KMeleon")) {
|
this.product = Product.KONQUEOR;
|
||||||
this.product = Product.KMELEON;
|
} else if (userAgentHeader.contains("KMeleon")) {
|
||||||
} else if (userAgentHeader.contains("Gecko")) {
|
this.product = Product.KMELEON;
|
||||||
// Firefox/Mozilla/Camino:
|
} else if (userAgentHeader.contains("Gecko")) {
|
||||||
// Mozilla/MozVer (Platform; Security; SubPlatform; Language; rv:Revision[; Extension]*) Gecko/GeckVer [Product/ProdVer]
|
// Firefox/Mozilla/Camino:
|
||||||
// SampleTest: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20050302 Firefox/0.9.6
|
// Mozilla/MozVer (Platform; Security; SubPlatform; Language; rv:Revision[; Extension]*) Gecko/GeckVer [Product/ProdVer]
|
||||||
// Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.7) Gecko/20070914 Firefox/2.0.0.7 Creative ZENcast v1.02.08 FirePHP/0.0.5.13
|
// SampleTest: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20050302 Firefox/0.9.6
|
||||||
// 'Mozilla/5.0 (Windows; U; Windows NT 5.1; es-ES; rv:1.7.12) Gecko/20050915'
|
// Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.8.1.7) Gecko/20070914 Firefox/2.0.0.7 Creative ZENcast v1.02.08 FirePHP/0.0.5.13
|
||||||
// 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.7) Gecko/20070914 firefox 2.0'
|
// 'Mozilla/5.0 (Windows; U; Windows NT 5.1; es-ES; rv:1.7.12) Gecko/20050915'
|
||||||
// 'Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.8.1.6) Gecko/20060601 Firefox/2.0.0.6 (Ubuntu-edgy)'
|
// 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.7) Gecko/20070914 firefox 2.0'
|
||||||
// 'Mozilla/5.0 (X11; U; Linux i686; es-ES; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12'
|
// 'Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.8.1.6) Gecko/20060601 Firefox/2.0.0.6 (Ubuntu-edgy)'
|
||||||
// "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.12) Gecko/20080129 Iceweasel/2.0.0.12 (Debian-2.0.0.12-0etch1)
|
// 'Mozilla/5.0 (X11; U; Linux i686; es-ES; rv:1.8.0.12) Gecko/20070731 Ubuntu/dapper-security Firefox/1.5.0.12'
|
||||||
|
// "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.12) Gecko/20080129 Iceweasel/2.0.0.12 (Debian-2.0.0.12-0etch1)
|
||||||
// Remove gecko string
|
|
||||||
// Debian Firefox is remamed to iceweasel.
|
// Remove gecko string
|
||||||
productAddition = productAddition.replace("Iceweasel", "firefox");
|
// Debian Firefox is remamed to iceweasel.
|
||||||
|
productAddition = productAddition.replace("Iceweasel", "firefox");
|
||||||
|
|
||||||
productAddition = productAddition.substring(productAddition.indexOf(' ') + 1);
|
|
||||||
productAddition = productAddition.toLowerCase();
|
productAddition = productAddition.substring(productAddition.indexOf(' ') + 1);
|
||||||
String prodDesc = null;
|
productAddition = productAddition.toLowerCase();
|
||||||
if (productAddition.contains("firefox")) {
|
String prodDesc = null;
|
||||||
this.product = Product.FIREFOX;
|
if (productAddition.contains("firefox")) {
|
||||||
prodDesc = "firefox";
|
this.product = Product.FIREFOX;
|
||||||
} else if (productAddition.contains("netscape")) {
|
prodDesc = "firefox";
|
||||||
this.product = Product.NETSCAPE;
|
} else if (productAddition.contains("netscape")) {
|
||||||
prodDesc = "netscape";
|
this.product = Product.NETSCAPE;
|
||||||
} else if (productAddition.contains("camino")) {
|
prodDesc = "netscape";
|
||||||
this.product = Product.CAMINO;
|
} else if (productAddition.contains("camino")) {
|
||||||
prodDesc = "camino";
|
this.product = Product.CAMINO;
|
||||||
} else {
|
prodDesc = "camino";
|
||||||
this.product = Product.MOZILLA;
|
} else {
|
||||||
// @todo: How it can get the mozilla vesion?
|
this.product = Product.MOZILLA;
|
||||||
}
|
// @todo: How it can get the mozilla vesion?
|
||||||
|
}
|
||||||
// Now, parse product version ...
|
|
||||||
if (prodDesc != null) {
|
// Now, parse product version ...
|
||||||
int sI = productAddition.indexOf(prodDesc) + prodDesc.length() + 1;
|
if (prodDesc != null) {
|
||||||
int eI = productAddition.indexOf(' ', sI);
|
int sI = productAddition.indexOf(prodDesc) + prodDesc.length() + 1;
|
||||||
if (eI == -1) {
|
int eI = productAddition.indexOf(' ', sI);
|
||||||
eI = productAddition.length();
|
if (eI == -1) {
|
||||||
}
|
eI = productAddition.length();
|
||||||
|
}
|
||||||
final String productVersion = productAddition.substring(sI, eI);
|
|
||||||
parseVersion(productVersion);
|
final String productVersion = productAddition.substring(sI, eI);
|
||||||
|
parseVersion(productVersion);
|
||||||
}
|
|
||||||
|
}
|
||||||
} else if (userAgentHeader.contains("Opera")) {
|
|
||||||
// Opera:
|
} else if (userAgentHeader.contains("Opera")) {
|
||||||
// Samples: Opera/9.0 (Windows NT 5.1; U; en)
|
// Opera:
|
||||||
// Opera/8.5 (Macintosh; PPC Mac OS X; U; en)
|
// Samples: Opera/9.0 (Windows NT 5.1; U; en)
|
||||||
// Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en) Opera 8.5
|
// Opera/8.5 (Macintosh; PPC Mac OS X; U; en)
|
||||||
// Mozilla/4.0 (compatible; MSIE 6.0; Mac_PowerPC Mac OS X; en) Opera 8.5
|
// Mozilla/5.0 (Macintosh; PPC Mac OS X; U; en) Opera 8.5
|
||||||
// Opera/9.21 (Windows NT 5.1; U; en)
|
// Mozilla/4.0 (compatible; MSIE 6.0; Mac_PowerPC Mac OS X; en) Opera 8.5
|
||||||
this.product = Product.OPERA;
|
// Opera/9.21 (Windows NT 5.1; U; en)
|
||||||
String productVersion;
|
this.product = Product.OPERA;
|
||||||
if (application.startsWith("Opera")) {
|
String productVersion;
|
||||||
productVersion = application.substring(application.indexOf('/') + 1, application.length());
|
if (application.startsWith("Opera")) {
|
||||||
|
productVersion = application.substring(application.indexOf('/') + 1, application.length());
|
||||||
} else {
|
|
||||||
productVersion = productAddition.substring(application.lastIndexOf(' ') + 1, application.length());
|
} else {
|
||||||
}
|
productVersion = productAddition.substring(application.lastIndexOf(' ') + 1, application.length());
|
||||||
parseVersion(productVersion);
|
}
|
||||||
|
parseVersion(productVersion);
|
||||||
} else if (userAgentHeader.contains("4.7")) {
|
|
||||||
this.product = Product.NETSCAPE;
|
} else if (userAgentHeader.contains("4.7")) {
|
||||||
} else if (userAgentHeader.contains("Lynx")) {
|
this.product = Product.NETSCAPE;
|
||||||
this.product = Product.LYNX;
|
} else if (userAgentHeader.contains("Lynx")) {
|
||||||
} else {
|
this.product = Product.LYNX;
|
||||||
// It's a robot ..
|
} else {
|
||||||
for (String botAgent : botAgents) {
|
// It's a robot ..
|
||||||
if (userAgentHeader.contains(botAgent)) {
|
for (String botAgent : botAgents) {
|
||||||
// set a key in the session, so the next time we don't have to manually
|
if (userAgentHeader.contains(botAgent)) {
|
||||||
// detect the robot again
|
// set a key in the session, so the next time we don't have to manually
|
||||||
this.product = Product.ROBOT;
|
// detect the robot again
|
||||||
break;
|
this.product = Product.ROBOT;
|
||||||
}
|
break;
|
||||||
}
|
}
|
||||||
logger.info("UserAgent could not be detected: '" + userAgentHeader + "'");
|
}
|
||||||
}
|
logger.info("UserAgent could not be detected: '" + userAgentHeader + "'");
|
||||||
} catch (Throwable e) {
|
}
|
||||||
logger.error("Could not detect the browser based on the user agent: '" + userAgentHeader + "'");
|
} catch (Throwable e) {
|
||||||
// Mark as an unsupported browser...
|
logger.error("Could not detect the browser based on the user agent: '" + userAgentHeader + "'");
|
||||||
this.product = Product.ROBOT;
|
// Mark as an unsupported browser...
|
||||||
}
|
this.product = Product.ROBOT;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
private OS parseOS(String details) {
|
|
||||||
OS result;
|
private OS parseOS(String details) {
|
||||||
if (details.contains("Windows"))
|
OS result;
|
||||||
result = OS.WINDOWS;
|
if (details.contains("Windows"))
|
||||||
else if (details.contains("Mac") || details.contains("Macintosh"))
|
result = OS.WINDOWS;
|
||||||
result = OS.MAC;
|
else if (details.contains("Mac") || details.contains("Macintosh"))
|
||||||
else if (details.contains("X11"))
|
result = OS.MAC;
|
||||||
result = OS.LINUX;
|
else if (details.contains("X11"))
|
||||||
else
|
result = OS.LINUX;
|
||||||
result = OS.KNOWN;
|
else
|
||||||
return result;
|
result = OS.KNOWN;
|
||||||
}
|
return result;
|
||||||
|
}
|
||||||
public static UserAgent create(final HttpServletRequest request) {
|
|
||||||
final String userAgent = request.getHeader(USER_AGENT_HEADER);
|
public static UserAgent create(final HttpServletRequest request) {
|
||||||
return new UserAgent(userAgent);
|
final String userAgent = request.getHeader(USER_AGENT_HEADER);
|
||||||
}
|
return new UserAgent(userAgent);
|
||||||
|
}
|
||||||
|
|
||||||
public static UserAgent create(final String userAgent) {
|
|
||||||
return new UserAgent(userAgent);
|
public static UserAgent create(final String userAgent) {
|
||||||
}
|
return new UserAgent(userAgent);
|
||||||
|
}
|
||||||
private void parseVersion(final String version) {
|
|
||||||
final int index = version.indexOf('.');
|
private void parseVersion(final String version) {
|
||||||
final String vm = version.substring(0, index);
|
final int index = version.indexOf('.');
|
||||||
final String vv = version.substring(index + 1, version.length());
|
final String vm = version.substring(0, index);
|
||||||
this.versionMajor = Integer.parseInt(vm);
|
final String vv = version.substring(index + 1, version.length());
|
||||||
char c = vv.charAt(0);
|
this.versionMajor = Integer.parseInt(vm);
|
||||||
this.versionVariation = Integer.valueOf(String.valueOf(c));
|
char c = vv.charAt(0);
|
||||||
}
|
this.versionVariation = Integer.valueOf(String.valueOf(c));
|
||||||
|
}
|
||||||
/**
|
|
||||||
* All known robot user-agent headers (list can be found
|
/**
|
||||||
* <a href="http://www.robotstxt.org/wc/activel">here</a>).
|
* All known robot user-agent headers (list can be found
|
||||||
* <p/>
|
* <a href="http://www.robotstxt.org/wc/activel">here</a>).
|
||||||
* <p>NOTE: To avoid bad detection:</p>
|
* <p/>
|
||||||
* <p/>
|
* <p>NOTE: To avoid bad detection:</p>
|
||||||
* <ul>
|
* <p/>
|
||||||
* <li>Robots with ID of 2 letters only were removed</li>
|
* <ul>
|
||||||
* <li>Robot called "webs" were removed</li>
|
* <li>Robots with ID of 2 letters only were removed</li>
|
||||||
* <li>directhit was changed in direct_hit (its real id)</li>
|
* <li>Robot called "webs" were removed</li>
|
||||||
* </ul>
|
* <li>directhit was changed in direct_hit (its real id)</li>
|
||||||
*/
|
* </ul>
|
||||||
private static final String[] botAgents = {
|
*/
|
||||||
"acme.spider", "ahoythehomepagefinder", "alkaline", "appie", "arachnophilia",
|
private static final String[] botAgents = {
|
||||||
"architext", "aretha", "ariadne", "aspider", "atn.txt", "atomz", "auresys",
|
"acme.spider", "ahoythehomepagefinder", "alkaline", "appie", "arachnophilia",
|
||||||
"backrub", "bigbrother", "bjaaland", "blackwidow", "blindekuh", "bloodhound",
|
"architext", "aretha", "ariadne", "aspider", "atn.txt", "atomz", "auresys",
|
||||||
"brightnet", "bspider", "cactvschemistryspider", "calif", "cassandra",
|
"backrub", "bigbrother", "bjaaland", "blackwidow", "blindekuh", "bloodhound",
|
||||||
"cgireader", "checkbot", "churl", "cmc", "collective", "combine", "conceptbot",
|
"brightnet", "bspider", "cactvschemistryspider", "calif", "cassandra",
|
||||||
"core", "cshkust", "cusco", "cyberspyder", "deweb", "dienstspider", "diibot",
|
"cgireader", "checkbot", "churl", "cmc", "collective", "combine", "conceptbot",
|
||||||
"direct_hit", "dnabot", "download_express", "dragonbot", "dwcp", "ebiness",
|
"core", "cshkust", "cusco", "cyberspyder", "deweb", "dienstspider", "diibot",
|
||||||
"eit", "emacs", "emcspider", "esther", "evliyacelebi", "fdse", "felix",
|
"direct_hit", "dnabot", "download_express", "dragonbot", "dwcp", "ebiness",
|
||||||
"ferret", "fetchrover", "fido", "finnish", "fireball", "fish", "fouineur",
|
"eit", "emacs", "emcspider", "esther", "evliyacelebi", "fdse", "felix",
|
||||||
"francoroute", "freecrawl", "funnelweb", "gazz", "gcreep", "getbot", "geturl",
|
"ferret", "fetchrover", "fido", "finnish", "fireball", "fish", "fouineur",
|
||||||
"golem", "googlebot", "grapnel", "griffon", "gromit", "gulliver", "hambot",
|
"francoroute", "freecrawl", "funnelweb", "gazz", "gcreep", "getbot", "geturl",
|
||||||
"harvest", "havindex", "hometown", "wired-digital", "htdig", "htmlgobble",
|
"golem", "googlebot", "grapnel", "griffon", "gromit", "gulliver", "hambot",
|
||||||
"hyperdecontextualizer", "ibm", "iconoclast", "ilse", "imagelock", "incywincy",
|
"harvest", "havindex", "hometown", "wired-digital", "htdig", "htmlgobble",
|
||||||
"informant", "infoseek", "infoseeksidewinder", "infospider", "inspectorwww",
|
"hyperdecontextualizer", "ibm", "iconoclast", "ilse", "imagelock", "incywincy",
|
||||||
"intelliagent", "iron33", "israelisearch", "javabee", "jcrawler", "jeeves",
|
"informant", "infoseek", "infoseeksidewinder", "infospider", "inspectorwww",
|
||||||
"jobot", "joebot", "jubii", "jumpstation", "katipo", "kdd", "kilroy",
|
"intelliagent", "iron33", "israelisearch", "javabee", "jcrawler", "jeeves",
|
||||||
"ko_yappo_robot", "labelgrabber.txt", "larbin", "legs", "linkscan",
|
"jobot", "joebot", "jubii", "jumpstation", "katipo", "kdd", "kilroy",
|
||||||
"linkwalker", "lockon", "logo_gif", "lycos", "macworm", "magpie", "mediafox",
|
"ko_yappo_robot", "labelgrabber.txt", "larbin", "legs", "linkscan",
|
||||||
"merzscope", "meshexplorer", "mindcrawler", "moget", "momspider", "monster",
|
"linkwalker", "lockon", "logo_gif", "lycos", "macworm", "magpie", "mediafox",
|
||||||
"motor", "muscatferret", "mwdsearch", "myweb", "netcarta", "netmechanic",
|
"merzscope", "meshexplorer", "mindcrawler", "moget", "momspider", "monster",
|
||||||
"netscoop", "newscan-online", "nhse", "nomad", "northstar", "nzexplorer",
|
"motor", "muscatferret", "mwdsearch", "myweb", "netcarta", "netmechanic",
|
||||||
"occam", "octopus", "orb_search", "packrat", "pageboy", "parasite", "patric",
|
"netscoop", "newscan-online", "nhse", "nomad", "northstar", "nzexplorer",
|
||||||
"perignator", "perlcrawler", "phantom", "piltdownman", "pioneer", "pitkow",
|
"occam", "octopus", "orb_search", "packrat", "pageboy", "parasite", "patric",
|
||||||
"pjspider", "pka", "plumtreewebaccessor", "poppi", "portalb", "puu", "python",
|
"perignator", "perlcrawler", "phantom", "piltdownman", "pioneer", "pitkow",
|
||||||
"raven", "rbse", "resumerobot", "rhcs", "roadrunner", "robbie", "robi",
|
"pjspider", "pka", "plumtreewebaccessor", "poppi", "portalb", "puu", "python",
|
||||||
"roverbot", "safetynetrobot", "scooter", "search_au", "searchprocess",
|
"raven", "rbse", "resumerobot", "rhcs", "roadrunner", "robbie", "robi",
|
||||||
"senrigan", "sgscout", "shaggy", "shaihulud", "sift", "simbot", "site-valet",
|
"roverbot", "safetynetrobot", "scooter", "search_au", "searchprocess",
|
||||||
"sitegrabber", "sitetech", "slurp", "smartspider", "snooper", "solbot",
|
"senrigan", "sgscout", "shaggy", "shaihulud", "sift", "simbot", "site-valet",
|
||||||
"spanner", "speedy", "spider_monkey", "spiderbot", "spiderman", "spry",
|
"sitegrabber", "sitetech", "slurp", "smartspider", "snooper", "solbot",
|
||||||
"ssearcher", "suke", "sven", "tach_bw", "tarantula", "tarspider", "tcl",
|
"spanner", "speedy", "spider_monkey", "spiderbot", "spiderman", "spry",
|
||||||
"techbot", "templeton", "titin", "titan", "tkwww", "tlspider", "ucsd",
|
"ssearcher", "suke", "sven", "tach_bw", "tarantula", "tarspider", "tcl",
|
||||||
"udmsearch", "urlck", "valkyrie", "victoria", "visionsearch", "voyager",
|
"techbot", "templeton", "titin", "titan", "tkwww", "tlspider", "ucsd",
|
||||||
"vwbot", "w3index", "w3m2", "wanderer", "webbandit", "webcatcher", "webcopy",
|
"udmsearch", "urlck", "valkyrie", "victoria", "visionsearch", "voyager",
|
||||||
"webfetcher", "webfoot", "weblayers", "weblinker", "webmirror", "webmoose",
|
"vwbot", "w3index", "w3m2", "wanderer", "webbandit", "webcatcher", "webcopy",
|
||||||
"webquest", "webreader", "webreaper", "websnarf", "webspider", "webvac",
|
"webfetcher", "webfoot", "weblayers", "weblinker", "webmirror", "webmoose",
|
||||||
"webwalk", "webwalker", "webwatch", "wget", "whowhere", "wmir", "wolp",
|
"webquest", "webreader", "webreaper", "websnarf", "webspider", "webvac",
|
||||||
"wombat", "worm", "wwwc", "wz101", "xget", "nederland.zoek"
|
"webwalk", "webwalker", "webwatch", "wget", "whowhere", "wmir", "wolp",
|
||||||
};
|
"wombat", "worm", "wwwc", "wz101", "xget", "nederland.zoek"
|
||||||
|
};
|
||||||
public boolean isBrowserSupported() {
|
|
||||||
// Is it a supported browser ?.
|
public boolean isBrowserSupported() {
|
||||||
final UserAgent.Product product = this.getProduct();
|
// Is it a supported browser ?.
|
||||||
boolean result = product == UserAgent.Product.FIREFOX && this.isVersionGreatedOrEqualThan(12, 0);
|
final UserAgent.Product product = this.getProduct();
|
||||||
result = result || product == UserAgent.Product.EXPLORER && this.isVersionGreatedOrEqualThan(7, 0) && this.getOs() == UserAgent.OS.WINDOWS;
|
boolean result = product == UserAgent.Product.FIREFOX && this.isVersionGreatedOrEqualThan(12, 0);
|
||||||
result = result || product == UserAgent.Product.OPERA && this.isVersionGreatedOrEqualThan(11, 0);
|
result = result || product == UserAgent.Product.EXPLORER && this.isVersionGreatedOrEqualThan(7, 0) && this.getOs() == UserAgent.OS.WINDOWS;
|
||||||
result = result || product == UserAgent.Product.CHROME && this.isVersionGreatedOrEqualThan(19, 0);
|
result = result || product == UserAgent.Product.OPERA && this.isVersionGreatedOrEqualThan(11, 0);
|
||||||
result = result || product == UserAgent.Product.SAFARI && this.isVersionGreatedOrEqualThan(5, 0);
|
result = result || product == UserAgent.Product.CHROME && this.isVersionGreatedOrEqualThan(19, 0);
|
||||||
result = result || product == Product.WEB_CRAWLER;
|
result = result || product == UserAgent.Product.SAFARI && this.isVersionGreatedOrEqualThan(5, 0);
|
||||||
|
result = result || product == Product.WEB_CRAWLER;
|
||||||
return result;
|
|
||||||
}
|
return result;
|
||||||
|
}
|
||||||
public boolean needsGCF() {
|
|
||||||
final UserAgent.Product product = this.getProduct();
|
public boolean needsGCF() {
|
||||||
return product == UserAgent.Product.EXPLORER && this.isVersionLessThan(9) && this.getOs() == UserAgent.OS.WINDOWS && !this.hasGCFInstalled;
|
final UserAgent.Product product = this.getProduct();
|
||||||
}
|
return product == UserAgent.Product.EXPLORER && this.isVersionLessThan(9) && this.getOs() == UserAgent.OS.WINDOWS && !this.hasGCFInstalled;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user