Pelzini

This is the code documentation for the Pelzini project

source of /processor/php_parser.php

PHP Parser
  1. <?php
  2. /*
  3. Copyright 2008 Josh Heidenreich
  4.  
  5. This file is part of Pelzini.
  6.  
  7. Pelzini is free software: you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation, either version 3 of the License, or
  10. (at your option) any later version.
  11.  
  12. Pelzini is distributed in the hope that it will be useful,
  13. but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. GNU General Public License for more details.
  16.  
  17. You should have received a copy of the GNU General Public License
  18. along with Pelzini. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20.  
  21.  
  22. /**
  23.  * PHP Parser
  24.  * @package Parsers
  25.  * @author Josh Heidenreich
  26.  * @since 0.1
  27.  **/
  28.  
  29. /**
  30.  * This is the parser for PHP files. It converts a file from the raw PHP into a document tree
  31.  *
  32.  * @todo Convert to new parser system (e.g. Javascript parser)
  33.  * @todo Add support for namespaces, even on PHP < 5.3
  34.  **/
  35. class PhpParser
  36. {
  37.  
  38. /**
  39.   * Parses a specific file
  40.   **/
  41. function parseFile($base_dir, $filename)
  42. {
  43. // You can enable the following if you want to debug the parser
  44. // If you enable the line after it (the 'strpos' line) instead,
  45. // it will only debug files containing 'test' (e.g. 'php_test.php')
  46. $debug = false;
  47. // if (strpos ($filename, 'test') !== false) $debug = true;
  48.  
  49. $source = @file_get_contents($base_dir . $filename);
  50. if ($source == null) return null;
  51.  
  52. $tokens = @token_get_all($source);
  53.  
  54.  
  55. $current_file = new ParserFile ();
  56. $current_file->name = $filename;
  57. $current_file->source = $source;
  58.  
  59. unset ($source);
  60.  
  61. // the vars that make it tick
  62. $current_function = null;
  63. $inside_function = null;
  64. $current_class = null;
  65. $inside_class = null;
  66. $current_constant = null;
  67. $next = null;
  68. $namespace = 0;
  69. $brace_count = 0;
  70. $abstract = false;
  71. $static = false;
  72. $final = false;
  73. $next_comment = null;
  74. $file_has_comment = false;
  75. $return = false;
  76. $byref = false;
  77.  
  78. // debugger
  79. if ($debug) {
  80. echo '<style>';
  81. echo 'span {color: green;}';
  82. echo 'h3 {border: 4px black solid; padding: 3px; margin-top: 2em;}';
  83. echo 'i {color: gray;}';
  84. echo '</style>';
  85.  
  86. echo '<h3>', htmlspecialchars($filename), '</h3>';
  87. echo '<pre>';
  88. }
  89.  
  90. $argument = null;
  91. $visibility = null;
  92. $param_type = null;
  93. foreach ($tokens as $token) {
  94. if (is_array($token) and $token[0] == T_WHITESPACE) continue;
  95.  
  96. // debugger
  97. if ($debug) {
  98. echo "\n";
  99. if (is_string($token)) {
  100. echo "BARE TEXT\n<i>" . htmlspecialchars($token) . "</i>\n";
  101. } else {
  102. echo htmlspecialchars(token_name($token[0])) . "\n<i>" . htmlspecialchars(str_replace("\n", '\n', $token[1])) . "</i>\n";
  103. }
  104. }
  105.  
  106. if ($return) {
  107. if ($token !== ';') {
  108. $inside_function->has_return_stmt = true;
  109. }
  110. $return = false;
  111. }
  112.  
  113. if (is_string($token)) {
  114. // opening of a function or class block
  115. if ($token == '{') {
  116. // opening of function
  117. if ($current_function != null) {
  118. if ($inside_class != null) {
  119. if ($visibility != null) {
  120. $current_function->visibility = $visibility;
  121. $visibility = null;
  122. }
  123. $inside_class->functions[] = $current_function;
  124.  
  125. } else {
  126. $current_file->functions[] = $current_function;
  127. }
  128.  
  129. $current_function->post_load();
  130. $inside_function = $current_function;
  131. $current_function = null;
  132. $argument = null;
  133.  
  134. // opening of class
  135. } else if ($current_class != null) {
  136. if ($visibility != null) {
  137. $current_class->visibility = $visibility;
  138. $visibility = null;
  139. }
  140. $current_file->classes[] = $current_class;
  141. $inside_class = $current_class;
  142. $current_class = null;
  143. $next = null;
  144.  
  145. } else {
  146. $brace_count++;
  147. }
  148.  
  149.  
  150. // function in an interface
  151. } else if ($token == ';') {
  152. if ($namespace == 1) {
  153. $namespace = 2;
  154. } else if ($current_function != null) {
  155. if ($visibility != null) {
  156. $current_function->visibility = $visibility;
  157. $visibility = null;
  158. }
  159. $current_function->post_load();
  160. $inside_class->functions[] = $current_function;
  161. $current_function = null;
  162. }
  163.  
  164.  
  165. // closing of a class or function block
  166. } else if ($token == '}') {
  167. if ($brace_count == 0) {
  168. if ($inside_function != null) {
  169. $inside_function = null;
  170. } else if ($inside_class != null) {
  171. $inside_class = null;
  172. }
  173.  
  174. } else {
  175. $brace_count--;
  176. }
  177.  
  178.  
  179. } else if ($token == '&') {
  180. if ($current_function != null) {
  181. $byref = true;
  182. }
  183. }
  184.  
  185.  
  186. } else {
  187. // token array
  188. list($id, $text, $linenum) = $token;
  189.  
  190. switch ($id) {
  191. case T_CURLY_OPEN:
  192. $brace_count++;
  193. break;
  194.  
  195.  
  196. case T_DOC_COMMENT:
  197. if ($next_comment and ! $file_has_comment) {
  198. $current_file->applyComment($next_comment);
  199. $next_comment = null;
  200. $file_has_comment = true;
  201. }
  202. $next_comment = $text;
  203. break;
  204.  
  205.  
  206. case T_FUNCTION:
  207. if ($inside_function != null) {
  208. break;
  209. }
  210. $current_function = new ParserFunction();
  211. $current_function->linenum = $linenum;
  212. if ($abstract) {
  213. $current_function->abstract = true;
  214. $abstract = false;
  215. }
  216. if ($static) {
  217. $current_function->static = true;
  218. $static = false;
  219. }
  220. if ($final) {
  221. $current_function->final = true;
  222. $final = false;
  223. }
  224. if ($next_comment) {
  225. $current_function->applyComment($next_comment);
  226. $next_comment = null;
  227. }
  228. $param_type = null;
  229. break;
  230.  
  231.  
  232. case T_CLASS:
  233. $current_class = new ParserClass();
  234. $current_class->linenum = $linenum;
  235. if ($abstract) {
  236. $current_class->abstract = true;
  237. $abstract = false;
  238. } else if ($final) {
  239. $current_class->final = true;
  240. $final = false;
  241. }
  242. if ($next_comment) {
  243. $current_class->applyComment($next_comment);
  244. $next_comment = null;
  245. }
  246. break;
  247.  
  248.  
  249. case T_INTERFACE:
  250. $current_class = new ParserInterface();
  251. $current_class->linenum = $linenum;
  252. if ($next_comment) {
  253. $current_class->applyComment($next_comment);
  254. $next_comment = null;
  255. }
  256. break;
  257.  
  258.  
  259. // variables are added according to scope
  260. // will become a ParserVariable or a ParserArgument
  261. case T_VARIABLE:
  262. if ($current_function != null) {
  263. $argument = new ParserArgument();
  264. $argument->linenum = $linenum;
  265. $argument->name = $text;
  266. if ($param_type != null) {
  267. $argument->type = $param_type;
  268. $param_type = null;
  269. }
  270. $argument->byref = $byref;
  271. $current_function->args[] = $argument;
  272. $byref = false;
  273.  
  274. } else if (($inside_class != null) && ($inside_function == null)) {
  275. $variable = new ParserVariable();
  276. $variable->linenum = $linenum;
  277. $variable->name = $text;
  278. $variable->visibility = $visibility ?: 'private';
  279. $visibility = null;
  280. if ($static) {
  281. $variable->static = true;
  282. $static = false;
  283. }
  284. if ($next_comment) {
  285. $variable->applyComment($next_comment);
  286. $next_comment = null;
  287. }
  288. $inside_class->variables[] = $variable;
  289. }
  290. break;
  291.  
  292.  
  293. // A string my become an extends, implements
  294. // function name or class name
  295. // it could also be 'define' or 'null'
  296. case T_STRING:
  297. if ($next != null) {
  298. if ($next == T_EXTENDS) {
  299. $current_class->extends = $text;
  300. } else if ($next == T_IMPLEMENTS) {
  301. $current_class->implements[] = $text;
  302. break;
  303. } else if ($next == T_NAMESPACE and $namespace == 0) {
  304. $current_file->namespace = array($text);
  305. $namespace = 1;
  306. } else if ($next == T_NS_SEPARATOR and $namespace == 1) {
  307. $current_file->namespace[] = $text;
  308. }
  309. $next = null;
  310.  
  311. } else if (strcasecmp($text, 'null') == 0) {
  312. if ($current_constant) {
  313. $current_constant->value = 'NULL';
  314. $current_file->constants[] = $current_constant;
  315. $current_constant = null;
  316.  
  317. } else if ($argument) {
  318. $argument->default = 'NULL';
  319. }
  320.  
  321. } else if (strcasecmp($text, 'true') == 0) {
  322. if ($current_constant) {
  323. $current_constant->value = 'TRUE';
  324. $current_file->constants[] = $current_constant;
  325. $current_constant = null;
  326.  
  327. } else if ($argument) {
  328. $argument->default = 'TRUE';
  329. }
  330.  
  331. } else if (strcasecmp($text, 'false') == 0) {
  332. if ($current_constant) {
  333. $current_constant->value = 'FALSE';
  334. $current_file->constants[] = $current_constant;
  335. $current_constant = null;
  336.  
  337. } else if ($argument) {
  338. $argument->default = 'FALSE';
  339. }
  340.  
  341. } else if ($current_function != null) {
  342. if ($current_function->name == '') {
  343. $current_function->name = $text;
  344. } else {
  345. $param_type = $text;
  346. }
  347.  
  348. } else if ($current_class != null) {
  349. $current_class->name = $text;
  350.  
  351. } else if (strcasecmp($text, 'define') == 0) {
  352. $current_constant = new ParserConstant();
  353. $current_constant->linenum = $linenum;
  354. if ($next_comment) {
  355. $current_constant->applyComment($next_comment);
  356. $next_comment = null;
  357. }
  358.  
  359. }
  360. break;
  361.  
  362.  
  363. case T_ARRAY:
  364. if ($current_function != null) {
  365. $param_type = $text;
  366. }
  367. break;
  368.  
  369.  
  370. case T_CONSTANT_ENCAPSED_STRING:
  371. // removes quotes, etc
  372. $name_search = array("\'", '\"', "'", '"');
  373. $name_replace = array("'", '"', '', '');
  374. $text = str_replace($name_search, $name_replace, $text);
  375.  
  376. if ($current_constant) {
  377. if ($current_constant->name == null) {
  378. $current_constant->name = $text;
  379. } else {
  380. $current_constant->value = $text;
  381. $current_file->constants[] = $current_constant;
  382. $current_constant = null;
  383. }
  384.  
  385. } else if ($argument) {
  386. $argument->default = $text;
  387. }
  388. break;
  389.  
  390.  
  391. case T_LNUMBER:
  392. case T_DNUMBER:
  393. if ($current_constant) {
  394. if ($current_constant->name != null) {
  395. $current_constant->value = $text;
  396. $current_file->constants[] = $current_constant;
  397. $current_constant = null;
  398. }
  399.  
  400. } else if ($argument) {
  401. $argument->default = $text;
  402. }
  403. break;
  404.  
  405.  
  406. // visibility
  407. case T_PRIVATE:
  408. $visibility = 'private';
  409. break;
  410.  
  411. case T_PROTECTED:
  412. $visibility = 'protected';
  413. break;
  414.  
  415. case T_PUBLIC:
  416. $visibility = 'public';
  417. break;
  418.  
  419.  
  420. // the next token after one of these does the grunt work
  421. case T_EXTENDS:
  422. case T_IMPLEMENTS:
  423. case T_NAMESPACE:
  424. case T_NS_SEPARATOR:
  425. $next = $id;
  426. break;
  427.  
  428. case T_ABSTRACT:
  429. $abstract = true;
  430. break;
  431.  
  432. case T_STATIC:
  433. if (! $inside_function) $static = true;
  434. break;
  435.  
  436. case T_FINAL:
  437. $final = true;
  438. break;
  439.  
  440. case T_RETURN:
  441. if ($inside_function) {
  442. $return = true;
  443. }
  444. break;
  445.  
  446. }
  447. }
  448. }
  449.  
  450. // If there is a comment left that never got assigned,
  451. // assign it to the file
  452. if ($next_comment and ! $file_has_comment) {
  453. $current_file->applyComment($next_comment);
  454. $next_comment = null;
  455. $file_has_comment = true;
  456. }
  457.  
  458. if ($debug) echo '</pre>';
  459.  
  460.  
  461. return $current_file;
  462. }
  463.  
  464.  
  465. }
  466.  
  467.  
  468. ?>
  469.