|
|
|
@ -1280,12 +1280,13 @@ LangType FileManager::detectLanguageFromTextBegining(const unsigned char *data,
|
|
|
|
|
// First test for a Unix-like Shebang
|
|
|
|
|
// See https://en.wikipedia.org/wiki/Shebang_%28Unix%29 for more details about Shebang
|
|
|
|
|
std::string shebang = "#!";
|
|
|
|
|
auto res = std::mismatch(shebang.begin(), shebang.end(), buf2Test.begin());
|
|
|
|
|
if (res.first == shebang.end())
|
|
|
|
|
|
|
|
|
|
size_t foundPos = buf2Test.find(shebang);
|
|
|
|
|
if (foundPos == 0)
|
|
|
|
|
{
|
|
|
|
|
// Make a list of the most commonly used languages
|
|
|
|
|
const size_t SHEBANG_LANGUAGES = 6;
|
|
|
|
|
FirstLineLanguages ShebangLangs[SHEBANG_LANGUAGES] = {
|
|
|
|
|
const size_t NB_SHEBANG_LANGUAGES = 6;
|
|
|
|
|
FirstLineLanguages ShebangLangs[NB_SHEBANG_LANGUAGES] = {
|
|
|
|
|
{ "sh", L_BASH },
|
|
|
|
|
{ "python", L_PYTHON },
|
|
|
|
|
{ "perl", L_PERL },
|
|
|
|
@ -1295,7 +1296,7 @@ LangType FileManager::detectLanguageFromTextBegining(const unsigned char *data,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Go through the list of languages
|
|
|
|
|
for (i = 0; i < SHEBANG_LANGUAGES; ++i)
|
|
|
|
|
for (i = 0; i < NB_SHEBANG_LANGUAGES; ++i)
|
|
|
|
|
{
|
|
|
|
|
if (buf2Test.find(ShebangLangs[i].pattern) != std::string::npos)
|
|
|
|
|
{
|
|
|
|
@ -1308,18 +1309,19 @@ LangType FileManager::detectLanguageFromTextBegining(const unsigned char *data,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Are there any other patterns we know off?
|
|
|
|
|
const size_t FIRST_LINE_LANGUAGES = 4;
|
|
|
|
|
FirstLineLanguages languages[FIRST_LINE_LANGUAGES] = {
|
|
|
|
|
const size_t NB_FIRST_LINE_LANGUAGES = 5;
|
|
|
|
|
FirstLineLanguages languages[NB_FIRST_LINE_LANGUAGES] = {
|
|
|
|
|
{ "<?xml", L_XML },
|
|
|
|
|
{ "<?php", L_PHP },
|
|
|
|
|
{ "<html", L_HTML },
|
|
|
|
|
{ "<!DOCTYPE html", L_HTML }
|
|
|
|
|
{ "<!DOCTYPE html", L_HTML },
|
|
|
|
|
{ "<?", L_PHP } // MUST be after "<?php" and "<?xml" to get the result as accurate as possible
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < FIRST_LINE_LANGUAGES; ++i)
|
|
|
|
|
for (i = 0; i < NB_FIRST_LINE_LANGUAGES; ++i)
|
|
|
|
|
{
|
|
|
|
|
res = std::mismatch(languages[i].pattern.begin(), languages[i].pattern.end(), buf2Test.begin());
|
|
|
|
|
if (res.first == languages[i].pattern.end())
|
|
|
|
|
foundPos = buf2Test.find(languages[i].pattern);
|
|
|
|
|
if (foundPos == 0)
|
|
|
|
|
{
|
|
|
|
|
return languages[i].lang;
|
|
|
|
|
}
|
|
|
|
|